]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
8f33cff284001afa2e5fe7159860ef55434d0152
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/thread.h>
41 #include <kern/sched.h>
42 #include <kern/zalloc.h>
43 #include <kern/kalloc.h>
44 #include <kern/ledger.h>
45 #include <kern/misc_protos.h>
46 #include <kern/spl.h>
47 #include <kern/xpr.h>
48 #include <kern/trustcache.h>
49
50 #include <os/overflow.h>
51
52 #include <vm/pmap.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_protos.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_pageout.h>
59 #include <vm/cpm.h>
60
61 #include <libkern/img4/interface.h>
62 #include <libkern/section_keywords.h>
63
64 #include <machine/atomic.h>
65 #include <machine/thread.h>
66 #include <machine/lowglobals.h>
67
68 #include <arm/caches_internal.h>
69 #include <arm/cpu_data.h>
70 #include <arm/cpu_data_internal.h>
71 #include <arm/cpu_capabilities.h>
72 #include <arm/cpu_number.h>
73 #include <arm/machine_cpu.h>
74 #include <arm/misc_protos.h>
75 #include <arm/trap.h>
76
77 #if (__ARM_VMSA__ > 7)
78 #include <arm64/proc_reg.h>
79 #include <pexpert/arm64/boot.h>
80 #if CONFIG_PGTRACE
81 #include <stdint.h>
82 #include <arm64/pgtrace.h>
83 #if CONFIG_PGTRACE_NONKEXT
84 #include <arm64/pgtrace_decoder.h>
85 #endif // CONFIG_PGTRACE_NONKEXT
86 #endif
87 #endif
88
89 #include <pexpert/device_tree.h>
90
91 #include <san/kasan.h>
92 #include <sys/cdefs.h>
93
94
95 #if MACH_ASSERT
96 int vm_footprint_suspend_allowed = 1;
97
98 extern int pmap_ledgers_panic;
99 extern int pmap_ledgers_panic_leeway;
100
101 int pmap_stats_assert = 1;
102 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
103 MACRO_BEGIN \
104 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
105 assertf(cond, fmt, ##__VA_ARGS__); \
106 MACRO_END
107 #else /* MACH_ASSERT */
108 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
109 #endif /* MACH_ASSERT */
110
111 #if DEVELOPMENT || DEBUG
112 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
113 (current_thread()->pmap_footprint_suspended)
114 #else /* DEVELOPMENT || DEBUG */
115 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
116 #endif /* DEVELOPMENT || DEBUG */
117
118
119
120 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
121 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
122
123
124 #if DEVELOPMENT || DEBUG
125 int panic_on_unsigned_execute = 0;
126 #endif /* DEVELOPMENT || DEBUG */
127
128
129 /* Virtual memory region for early allocation */
130 #if (__ARM_VMSA__ == 7)
131 #define VREGION1_START (VM_HIGH_KERNEL_WINDOW & ~ARM_TT_L1_PT_OFFMASK)
132 #else
133 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
134 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
135 #endif
136 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
137
138 extern unsigned int not_in_kdp;
139
140 extern vm_offset_t first_avail;
141
142 extern pmap_paddr_t avail_start;
143 extern pmap_paddr_t avail_end;
144
145 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
146 extern vm_offset_t virtual_space_end; /* End of kernel address space */
147 extern vm_offset_t static_memory_end;
148
149 extern int hard_maxproc;
150
151 #if (__ARM_VMSA__ > 7)
152 /* The number of address bits one TTBR can cover. */
153 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
154
155 /*
156 * The bounds on our TTBRs. These are for sanity checking that
157 * an address is accessible by a TTBR before we attempt to map it.
158 */
159 #define ARM64_TTBR0_MIN_ADDR (0ULL)
160 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
161 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
162 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
163
164 /* The level of the root of a page table. */
165 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
166
167 /* The number of entries in the root TT of a page table. */
168 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
169 #else
170 const uint64_t arm64_root_pgtable_level = 0;
171 const uint64_t arm64_root_pgtable_num_ttes = 0;
172 #endif
173
174 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
175 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
176
177 struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
178 vm_object_t pmap_object = &pmap_object_store;
179
180 static struct zone *pmap_zone; /* zone of pmap structures */
181
182 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA)
183 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
184 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
185
186 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA)
187 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
188
189 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA)
190
191 typedef struct page_free_entry {
192 struct page_free_entry *next;
193 } page_free_entry_t;
194
195 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
196
197 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
198 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
199 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
200
201
202 typedef struct tt_free_entry {
203 struct tt_free_entry *next;
204 } tt_free_entry_t;
205
206 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
207
208 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
209 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
210 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
211 #define FREE_PAGE_SIZE_TT_MAX 4
212 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
213 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
214 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
215 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
216 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
217 unsigned int free_tt_count MARK_AS_PMAP_DATA;
218 unsigned int free_tt_max MARK_AS_PMAP_DATA;
219
220 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
221
222 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
223 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
224 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
225
226 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
227 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
228 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
229 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
230 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
231 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
232 unsigned int inuse_pmap_pages_count = 0; /* debugging */
233
234 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
235 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
236
237 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
238 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
239
240 #if DEVELOPMENT || DEBUG
241 int nx_enabled = 1; /* enable no-execute protection */
242 int allow_data_exec = 0; /* No apps may execute data */
243 int allow_stack_exec = 0; /* No apps may execute from the stack */
244 #else /* DEVELOPMENT || DEBUG */
245 const int nx_enabled = 1; /* enable no-execute protection */
246 const int allow_data_exec = 0; /* No apps may execute data */
247 const int allow_stack_exec = 0; /* No apps may execute from the stack */
248 #endif /* DEVELOPMENT || DEBUG */
249
250 /*
251 * pv_entry_t - structure to track the active mappings for a given page
252 */
253 typedef struct pv_entry {
254 struct pv_entry *pve_next; /* next alias */
255 pt_entry_t *pve_ptep; /* page table entry */
256 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
257 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
258 * are 32-bit:
259 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
260 * pt_desc.
261 */
262 } __attribute__ ((aligned(8))) pv_entry_t;
263 #else
264 } pv_entry_t;
265 #endif
266
267 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
268
269 /*
270 * PMAP LEDGERS:
271 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
272 * as a marker for pages mapped through an "alternate accounting" mapping.
273 * These macros set, clear and test for this marker and extract the actual
274 * value of the "pve_next" pointer.
275 */
276 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
277 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
278 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
279 PVE_NEXT_ALTACCT)
280 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
281 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
282 ~PVE_NEXT_ALTACCT)
283 #define PVE_NEXT_IS_ALTACCT(pve_next) \
284 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
285 #define PVE_NEXT_PTR(pve_next) \
286 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
287 ~PVE_NEXT_ALTACCT))
288 #if MACH_ASSERT
289 static void pmap_check_ledgers(pmap_t pmap);
290 #else
291 static inline void
292 pmap_check_ledgers(__unused pmap_t pmap)
293 {
294 }
295 #endif /* MACH_ASSERT */
296
297 SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
298
299 pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
300 pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
301 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA)
302 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA)
303
304 decl_simple_lock_data(, phys_backup_lock)
305
306 /*
307 * pt_desc - structure to keep info on page assigned to page tables
308 */
309 #if (__ARM_VMSA__ == 7)
310 #define PT_INDEX_MAX 1
311 #else
312 #if (ARM_PGSHIFT == 14)
313 #define PT_INDEX_MAX 1
314 #else
315 #define PT_INDEX_MAX 4
316 #endif
317 #endif
318
319 #define PT_DESC_REFCOUNT 0x4000U
320 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
321
322 typedef struct pt_desc {
323 queue_chain_t pt_page;
324 struct {
325 /*
326 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
327 * For leaf pagetables, should reflect the number of non-empty PTEs
328 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
329 */
330 unsigned short refcnt;
331 /*
332 * For non-leaf pagetables, should be 0
333 * For leaf pagetables, should reflect the number of wired entries
334 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
335 */
336 unsigned short wiredcnt;
337 } pt_cnt[PT_INDEX_MAX];
338 union {
339 struct pmap *pmap;
340 };
341 struct {
342 vm_offset_t va;
343 } pt_map[PT_INDEX_MAX];
344 } pt_desc_t;
345
346
347 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
348
349 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
350
351 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
352 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
353 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
354 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA)
355
356 /*
357 * physical page attribute
358 */
359 typedef u_int16_t pp_attr_t;
360
361 #define PP_ATTR_WIMG_MASK 0x003F
362 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
363
364 #define PP_ATTR_REFERENCED 0x0040
365 #define PP_ATTR_MODIFIED 0x0080
366
367 #define PP_ATTR_INTERNAL 0x0100
368 #define PP_ATTR_REUSABLE 0x0200
369 #define PP_ATTR_ALTACCT 0x0400
370 #define PP_ATTR_NOENCRYPT 0x0800
371
372 #define PP_ATTR_REFFAULT 0x1000
373 #define PP_ATTR_MODFAULT 0x2000
374
375
376 SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
377
378 typedef struct pmap_io_range {
379 uint64_t addr;
380 uint32_t len;
381 uint32_t wimg; // treated as pp_attr_t
382 } __attribute__((packed)) pmap_io_range_t;
383
384 SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table;
385
386 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
387 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
388
389 SECURITY_READ_ONLY_LATE(pmap_paddr_t) io_rgn_start = 0;
390 SECURITY_READ_ONLY_LATE(pmap_paddr_t) io_rgn_end = 0;
391 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
392
393 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
394
395 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
396 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
397
398 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
399 #if defined(__arm64__)
400 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
401 #endif
402
403 /* free address spaces (1 means free) */
404 static uint32_t asid_bitmap[MAX_ASID / (sizeof(uint32_t) * NBBY)] MARK_AS_PMAP_DATA;
405
406 #if (__ARM_VMSA__ > 7)
407 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
408 #endif
409
410
411 #define pa_index(pa) \
412 (atop((pa) - vm_first_phys))
413
414 #define pai_to_pvh(pai) \
415 (&pv_head_table[pai])
416
417 #define pa_valid(x) \
418 ((x) >= vm_first_phys && (x) < vm_last_phys)
419
420 /* PTE Define Macros */
421
422 #define pte_is_wired(pte) \
423 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
424
425 #define pte_set_wired(ptep, wired) \
426 do { \
427 SInt16 *ptd_wiredcnt_ptr; \
428 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->pt_cnt[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
429 if (wired) { \
430 *ptep |= ARM_PTE_WIRED; \
431 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
432 } else { \
433 *ptep &= ~ARM_PTE_WIRED; \
434 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
435 } \
436 } while(0)
437
438 #define pte_was_writeable(pte) \
439 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
440
441 #define pte_set_was_writeable(pte, was_writeable) \
442 do { \
443 if ((was_writeable)) { \
444 (pte) |= ARM_PTE_WRITEABLE; \
445 } else { \
446 (pte) &= ~ARM_PTE_WRITEABLE; \
447 } \
448 } while(0)
449
450 /* PVE Define Macros */
451
452 #define pve_next(pve) \
453 ((pve)->pve_next)
454
455 #define pve_link_field(pve) \
456 (&pve_next(pve))
457
458 #define pve_link(pp, e) \
459 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
460
461 #define pve_unlink(pp, e) \
462 (pve_next(pp) = pve_next(e))
463
464 /* bits held in the ptep pointer field */
465
466 #define pve_get_ptep(pve) \
467 ((pve)->pve_ptep)
468
469 #define pve_set_ptep(pve, ptep_new) \
470 do { \
471 (pve)->pve_ptep = (ptep_new); \
472 } while (0)
473
474 /* PTEP Define Macros */
475
476 #if (__ARM_VMSA__ == 7)
477
478 #define ARM_PT_DESC_INDEX_MASK 0x00000
479 #define ARM_PT_DESC_INDEX_SHIFT 0
480
481 /*
482 * mask for page descriptor index: 4MB per page table
483 */
484 #define ARM_TT_PT_INDEX_MASK 0xfffU /* mask for page descriptor index: 4MB per page table */
485
486 /*
487 * Shift value used for reconstructing the virtual address for a PTE.
488 */
489 #define ARM_TT_PT_ADDR_SHIFT (10U)
490
491 #define ptep_get_va(ptep) \
492 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~0xFFF))))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
493
494 #define ptep_get_pmap(ptep) \
495 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~0xFFF))))))))->pmap))
496
497 #else
498
499 #if (ARM_PGSHIFT == 12)
500 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
501 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
502 /*
503 * mask for page descriptor index: 2MB per page table
504 */
505 #define ARM_TT_PT_INDEX_MASK (0x0fffULL)
506 /*
507 * Shift value used for reconstructing the virtual address for a PTE.
508 */
509 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
510
511 /* TODO: Give this a better name/documentation than "other" */
512 #define ARM_TT_PT_OTHER_MASK (0x0fffULL)
513
514 #else
515
516 #define ARM_PT_DESC_INDEX_MASK (0x00000)
517 #define ARM_PT_DESC_INDEX_SHIFT (0)
518 /*
519 * mask for page descriptor index: 32MB per page table
520 */
521 #define ARM_TT_PT_INDEX_MASK (0x3fffULL)
522 /*
523 * Shift value used for reconstructing the virtual address for a PTE.
524 */
525 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
526
527 /* TODO: Give this a better name/documentation than "other" */
528 #define ARM_TT_PT_OTHER_MASK (0x3fffULL)
529 #endif
530
531 #define ARM_PT_DESC_INDEX(ptep) \
532 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
533
534 #define ptep_get_va(ptep) \
535 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK))))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
536
537 #define ptep_get_pmap(ptep) \
538 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK))))))))->pmap))
539
540 #endif
541
542 #define ARM_PT_DESC_INDEX(ptep) \
543 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
544
545 #define ptep_get_ptd(ptep) \
546 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
547
548
549 /* PVH Define Macros */
550
551 /* pvhead type */
552 #define PVH_TYPE_NULL 0x0UL
553 #define PVH_TYPE_PVEP 0x1UL
554 #define PVH_TYPE_PTEP 0x2UL
555 #define PVH_TYPE_PTDP 0x3UL
556
557 #define PVH_TYPE_MASK (0x3UL)
558
559 #ifdef __arm64__
560
561 #define PVH_FLAG_IOMMU 0x4UL
562 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63)
563 #define PVH_FLAG_CPU (1ULL << 62)
564 #define PVH_LOCK_BIT 61
565 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
566 #define PVH_FLAG_EXEC (1ULL << 60)
567 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
568 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
569
570 #else /* !__arm64__ */
571
572 #define PVH_LOCK_BIT 31
573 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
574 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
575
576 #endif
577
578 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
579
580 #define pvh_test_type(h, b) \
581 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
582
583 #define pvh_ptep(h) \
584 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
585
586 #define pvh_list(h) \
587 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
588
589 #define pvh_get_flags(h) \
590 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
591
592 #define pvh_set_flags(h, f) \
593 do { \
594 __c11_atomic_store((_Atomic vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
595 memory_order_relaxed); \
596 } while (0)
597
598 #define pvh_update_head(h, e, t) \
599 do { \
600 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
601 __c11_atomic_store((_Atomic vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
602 memory_order_relaxed); \
603 } while (0)
604
605 #define pvh_update_head_unlocked(h, e, t) \
606 do { \
607 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
608 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
609 } while (0)
610
611 #define pvh_add(h, e) \
612 do { \
613 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
614 pve_next(e) = pvh_list(h); \
615 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
616 } while (0)
617
618 #define pvh_remove(h, p, e) \
619 do { \
620 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
621 if ((p) == (h)) { \
622 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
623 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
624 } else { \
625 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
626 } \
627 } else { \
628 /* \
629 * PMAP LEDGERS: \
630 * preserve the "alternate accounting" bit \
631 * when updating "p" (the previous entry's \
632 * "pve_next"). \
633 */ \
634 boolean_t __is_altacct; \
635 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
636 *(p) = PVE_NEXT_PTR(pve_next((e))); \
637 if (__is_altacct) { \
638 PVE_NEXT_SET_ALTACCT((p)); \
639 } else { \
640 PVE_NEXT_CLR_ALTACCT((p)); \
641 } \
642 } \
643 } while (0)
644
645
646 /* PPATTR Define Macros */
647
648 #define ppattr_set_bits(h, b) \
649 do { \
650 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
651 } while (0)
652
653 #define ppattr_clear_bits(h, b) \
654 do { \
655 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
656 } while (0)
657
658 #define ppattr_test_bits(h, b) \
659 ((*(pp_attr_t *)(h) & (b)) == (b))
660
661 #define pa_set_bits(x, b) \
662 do { \
663 if (pa_valid(x)) \
664 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
665 (b)); \
666 } while (0)
667
668 #define pa_test_bits(x, b) \
669 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
670 (b)) : FALSE)
671
672 #define pa_clear_bits(x, b) \
673 do { \
674 if (pa_valid(x)) \
675 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
676 (b)); \
677 } while (0)
678
679 #define pa_set_modify(x) \
680 pa_set_bits(x, PP_ATTR_MODIFIED)
681
682 #define pa_clear_modify(x) \
683 pa_clear_bits(x, PP_ATTR_MODIFIED)
684
685 #define pa_set_reference(x) \
686 pa_set_bits(x, PP_ATTR_REFERENCED)
687
688 #define pa_clear_reference(x) \
689 pa_clear_bits(x, PP_ATTR_REFERENCED)
690
691
692 #define IS_INTERNAL_PAGE(pai) \
693 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
694 #define SET_INTERNAL_PAGE(pai) \
695 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
696 #define CLR_INTERNAL_PAGE(pai) \
697 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
698
699 #define IS_REUSABLE_PAGE(pai) \
700 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
701 #define SET_REUSABLE_PAGE(pai) \
702 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
703 #define CLR_REUSABLE_PAGE(pai) \
704 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
705
706 #define IS_ALTACCT_PAGE(pai, pve_p) \
707 (((pve_p) == NULL) \
708 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
709 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
710 #define SET_ALTACCT_PAGE(pai, pve_p) \
711 if ((pve_p) == NULL) { \
712 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
713 } else { \
714 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
715 }
716 #define CLR_ALTACCT_PAGE(pai, pve_p) \
717 if ((pve_p) == NULL) { \
718 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
719 } else { \
720 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
721 }
722
723 #define IS_REFFAULT_PAGE(pai) \
724 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
725 #define SET_REFFAULT_PAGE(pai) \
726 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
727 #define CLR_REFFAULT_PAGE(pai) \
728 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
729
730 #define IS_MODFAULT_PAGE(pai) \
731 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
732 #define SET_MODFAULT_PAGE(pai) \
733 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
734 #define CLR_MODFAULT_PAGE(pai) \
735 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
736
737 #define tte_get_ptd(tte) \
738 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
739
740
741 #if (__ARM_VMSA__ == 7)
742
743 #define tte_index(pmap, addr) \
744 ttenum((addr))
745
746 #else
747
748 #define tt0_index(pmap, addr) \
749 (((addr) & ARM_TT_L0_INDEX_MASK) >> ARM_TT_L0_SHIFT)
750
751 #define tt1_index(pmap, addr) \
752 (((addr) & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT)
753
754 #define tt2_index(pmap, addr) \
755 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
756
757 #define tt3_index(pmap, addr) \
758 (((addr) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)
759
760 #define tte_index(pmap, addr) \
761 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
762
763 #endif
764
765 /*
766 * Lock on pmap system
767 */
768
769 lck_grp_t pmap_lck_grp;
770
771 #define PMAP_LOCK_INIT(pmap) { \
772 simple_lock_init(&(pmap)->lock, 0); \
773 }
774
775 #define PMAP_LOCK(pmap) { \
776 pmap_simple_lock(&(pmap)->lock); \
777 }
778
779 #define PMAP_UNLOCK(pmap) { \
780 pmap_simple_unlock(&(pmap)->lock); \
781 }
782
783 #if MACH_ASSERT
784 #define PMAP_ASSERT_LOCKED(pmap) { \
785 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
786 }
787 #else
788 #define PMAP_ASSERT_LOCKED(pmap)
789 #endif
790
791 #if defined(__arm64__)
792 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
793 #else
794 #define PVH_LOCK_WORD 0
795 #endif
796
797 #define ASSERT_PVH_LOCKED(index) \
798 do { \
799 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
800 } while (0)
801
802 #define LOCK_PVH(index) \
803 do { \
804 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
805 } while (0)
806
807 #define UNLOCK_PVH(index) \
808 do { \
809 ASSERT_PVH_LOCKED(index); \
810 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
811 } while (0)
812
813 #define PMAP_UPDATE_TLBS(pmap, s, e) { \
814 flush_mmu_tlb_region_asid_async(s, (unsigned)(e - s), pmap); \
815 sync_tlb_flush(); \
816 }
817
818 #ifdef __ARM_L1_PTW__
819
820 #define FLUSH_PTE_RANGE(spte, epte) \
821 __builtin_arm_dmb(DMB_ISH);
822
823 #define FLUSH_PTE(pte_p) \
824 __builtin_arm_dmb(DMB_ISH);
825
826 #define FLUSH_PTE_STRONG(pte_p) \
827 __builtin_arm_dsb(DSB_ISH);
828
829 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
830 __builtin_arm_dsb(DSB_ISH);
831
832 #else /* __ARM_L1_PTW */
833
834 #define FLUSH_PTE_RANGE(spte, epte) \
835 CleanPoU_DcacheRegion((vm_offset_t)spte, \
836 (vm_offset_t)epte - (vm_offset_t)spte);
837
838 #define FLUSH_PTE(pte_p) \
839 __unreachable_ok_push \
840 if (TEST_PAGE_RATIO_4) \
841 FLUSH_PTE_RANGE((pte_p), (pte_p) + 4); \
842 else \
843 FLUSH_PTE_RANGE((pte_p), (pte_p) + 1); \
844 CleanPoU_DcacheRegion((vm_offset_t)pte_p, sizeof(pt_entry_t)); \
845 __unreachable_ok_pop
846
847 #define FLUSH_PTE_STRONG(pte_p) FLUSH_PTE(pte_p)
848
849 #define FLUSH_PTE_RANGE_STRONG(spte, epte) FLUSH_PTE_RANGE(spte, epte)
850
851 #endif /* !defined(__ARM_L1_PTW) */
852
853 #define WRITE_PTE_FAST(pte_p, pte_entry) \
854 __unreachable_ok_push \
855 if (TEST_PAGE_RATIO_4) { \
856 if (((unsigned)(pte_p)) & 0x1f) \
857 panic("WRITE_PTE\n"); \
858 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
859 *(pte_p) = (pte_entry); \
860 *((pte_p)+1) = (pte_entry); \
861 *((pte_p)+2) = (pte_entry); \
862 *((pte_p)+3) = (pte_entry); \
863 } else { \
864 *(pte_p) = (pte_entry); \
865 *((pte_p)+1) = (pte_entry) | 0x1000; \
866 *((pte_p)+2) = (pte_entry) | 0x2000; \
867 *((pte_p)+3) = (pte_entry) | 0x3000; \
868 } \
869 } else { \
870 *(pte_p) = (pte_entry); \
871 } \
872 __unreachable_ok_pop
873
874 #define WRITE_PTE(pte_p, pte_entry) \
875 WRITE_PTE_FAST(pte_p, pte_entry); \
876 FLUSH_PTE(pte_p);
877
878 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
879 WRITE_PTE_FAST(pte_p, pte_entry); \
880 FLUSH_PTE_STRONG(pte_p);
881
882 /*
883 * Other useful macros.
884 */
885 #define current_pmap() \
886 (vm_map_pmap(current_thread()->map))
887
888
889 #define VALIDATE_USER_PMAP(x)
890 #define VALIDATE_PMAP(x)
891 #define VALIDATE_LEDGER(x)
892
893
894 #if DEVELOPMENT || DEBUG
895
896 /*
897 * Trace levels are controlled by a bitmask in which each
898 * level can be enabled/disabled by the (1<<level) position
899 * in the boot arg
900 * Level 1: pmap lifecycle (create/destroy/switch)
901 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
902 * Level 3: internal state management (tte/attributes/fast-fault)
903 */
904
905 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
906
907 #define PMAP_TRACE(level, ...) \
908 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
909 KDBG_RELEASE(__VA_ARGS__); \
910 }
911 #else
912
913 #define PMAP_TRACE(level, ...)
914
915 #endif
916
917
918 /*
919 * Internal function prototypes (forward declarations).
920 */
921
922 static void pv_init(
923 void);
924
925 static boolean_t pv_alloc(
926 pmap_t pmap,
927 unsigned int pai,
928 pv_entry_t **pvepp);
929
930 static void pv_free(
931 pv_entry_t *pvep);
932
933 static void pv_list_free(
934 pv_entry_t *pvehp,
935 pv_entry_t *pvetp,
936 unsigned int cnt);
937
938 static void ptd_bootstrap(
939 pt_desc_t *ptdp, unsigned int ptd_cnt);
940
941 static inline pt_desc_t *ptd_alloc_unlinked(bool reclaim);
942
943 static pt_desc_t *ptd_alloc(pmap_t pmap, bool reclaim);
944
945 static void ptd_deallocate(pt_desc_t *ptdp);
946
947 static void ptd_init(
948 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
949
950 static void pmap_zone_init(
951 void);
952
953 static void pmap_set_reference(
954 ppnum_t pn);
955
956 ppnum_t pmap_vtophys(
957 pmap_t pmap, addr64_t va);
958
959 void pmap_switch_user_ttb(
960 pmap_t pmap);
961
962 static void flush_mmu_tlb_region_asid_async(
963 vm_offset_t va, unsigned length, pmap_t pmap);
964
965 static kern_return_t pmap_expand(
966 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
967
968 static int pmap_remove_range(
969 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
970
971 static int pmap_remove_range_options(
972 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, int);
973
974 static tt_entry_t *pmap_tt1_allocate(
975 pmap_t, vm_size_t, unsigned int);
976
977 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
978
979 static void pmap_tt1_deallocate(
980 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
981
982 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
983
984 static kern_return_t pmap_tt_allocate(
985 pmap_t, tt_entry_t **, unsigned int, unsigned int);
986
987 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
988
989 static void pmap_tte_deallocate(
990 pmap_t, tt_entry_t *, unsigned int);
991
992 #define PMAP_TT_L1_LEVEL 0x1
993 #define PMAP_TT_L2_LEVEL 0x2
994 #define PMAP_TT_L3_LEVEL 0x3
995 #if (__ARM_VMSA__ == 7)
996 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
997 #else
998 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
999 #endif
1000
1001 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1002 #if (__ARM_VMSA__ <= 7)
1003 #error This is not supported for old-style page tables
1004 #endif
1005 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1006 #else
1007 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1008 #endif
1009
1010 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1011 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1012 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1013
1014 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1015
1016 #if (__ARM_VMSA__ > 7)
1017
1018 static inline tt_entry_t *pmap_tt1e(
1019 pmap_t, vm_map_address_t);
1020
1021 static inline tt_entry_t *pmap_tt2e(
1022 pmap_t, vm_map_address_t);
1023
1024 static inline pt_entry_t *pmap_tt3e(
1025 pmap_t, vm_map_address_t);
1026
1027 static void pmap_unmap_sharedpage(
1028 pmap_t pmap);
1029
1030 static boolean_t
1031 pmap_is_64bit(pmap_t);
1032
1033
1034 #endif
1035 static inline tt_entry_t *pmap_tte(
1036 pmap_t, vm_map_address_t);
1037
1038 static inline pt_entry_t *pmap_pte(
1039 pmap_t, vm_map_address_t);
1040
1041 static void pmap_update_cache_attributes_locked(
1042 ppnum_t, unsigned);
1043
1044 boolean_t arm_clear_fast_fault(
1045 ppnum_t ppnum,
1046 vm_prot_t fault_type);
1047
1048 static pmap_paddr_t pmap_pages_reclaim(
1049 void);
1050
1051 static kern_return_t pmap_pages_alloc(
1052 pmap_paddr_t *pa,
1053 unsigned size,
1054 unsigned option);
1055
1056 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1057 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1058
1059 static void pmap_pages_free(
1060 pmap_paddr_t pa,
1061 unsigned size);
1062
1063 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1064
1065 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1066
1067
1068 static void pmap_trim_self(pmap_t pmap);
1069 static void pmap_trim_subord(pmap_t subord);
1070
1071 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1072 static __return_type __function_name##_internal __function_args;
1073
1074 PMAP_SUPPORT_PROTOTYPES(
1075 kern_return_t,
1076 arm_fast_fault, (pmap_t pmap,
1077 vm_map_address_t va,
1078 vm_prot_t fault_type,
1079 boolean_t from_user), ARM_FAST_FAULT_INDEX);
1080
1081
1082 PMAP_SUPPORT_PROTOTYPES(
1083 boolean_t,
1084 arm_force_fast_fault, (ppnum_t ppnum,
1085 vm_prot_t allow_mode,
1086 int options), ARM_FORCE_FAST_FAULT_INDEX);
1087
1088 PMAP_SUPPORT_PROTOTYPES(
1089 kern_return_t,
1090 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
1091
1092 PMAP_SUPPORT_PROTOTYPES(
1093 kern_return_t,
1094 mapping_replenish, (void), MAPPING_REPLENISH_INDEX);
1095
1096 PMAP_SUPPORT_PROTOTYPES(
1097 boolean_t,
1098 pmap_batch_set_cache_attributes, (ppnum_t pn,
1099 unsigned int cacheattr,
1100 unsigned int page_cnt,
1101 unsigned int page_index,
1102 boolean_t doit,
1103 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
1104
1105 PMAP_SUPPORT_PROTOTYPES(
1106 void,
1107 pmap_change_wiring, (pmap_t pmap,
1108 vm_map_address_t v,
1109 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
1110
1111 PMAP_SUPPORT_PROTOTYPES(
1112 pmap_t,
1113 pmap_create, (ledger_t ledger,
1114 vm_map_size_t size,
1115 boolean_t is_64bit), PMAP_CREATE_INDEX);
1116
1117 PMAP_SUPPORT_PROTOTYPES(
1118 void,
1119 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
1120
1121 PMAP_SUPPORT_PROTOTYPES(
1122 kern_return_t,
1123 pmap_enter_options, (pmap_t pmap,
1124 vm_map_address_t v,
1125 ppnum_t pn,
1126 vm_prot_t prot,
1127 vm_prot_t fault_type,
1128 unsigned int flags,
1129 boolean_t wired,
1130 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
1131
1132 PMAP_SUPPORT_PROTOTYPES(
1133 vm_offset_t,
1134 pmap_extract, (pmap_t pmap,
1135 vm_map_address_t va), PMAP_EXTRACT_INDEX);
1136
1137 PMAP_SUPPORT_PROTOTYPES(
1138 ppnum_t,
1139 pmap_find_phys, (pmap_t pmap,
1140 addr64_t va), PMAP_FIND_PHYS_INDEX);
1141
1142 #if (__ARM_VMSA__ > 7)
1143 PMAP_SUPPORT_PROTOTYPES(
1144 kern_return_t,
1145 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
1146 #endif
1147
1148
1149 PMAP_SUPPORT_PROTOTYPES(
1150 boolean_t,
1151 pmap_is_empty, (pmap_t pmap,
1152 vm_map_offset_t va_start,
1153 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
1154
1155
1156 PMAP_SUPPORT_PROTOTYPES(
1157 unsigned int,
1158 pmap_map_cpu_windows_copy, (ppnum_t pn,
1159 vm_prot_t prot,
1160 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
1161
1162 PMAP_SUPPORT_PROTOTYPES(
1163 kern_return_t,
1164 pmap_nest, (pmap_t grand,
1165 pmap_t subord,
1166 addr64_t vstart,
1167 addr64_t nstart,
1168 uint64_t size), PMAP_NEST_INDEX);
1169
1170 PMAP_SUPPORT_PROTOTYPES(
1171 void,
1172 pmap_page_protect_options, (ppnum_t ppnum,
1173 vm_prot_t prot,
1174 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
1175
1176 PMAP_SUPPORT_PROTOTYPES(
1177 void,
1178 pmap_protect_options, (pmap_t pmap,
1179 vm_map_address_t start,
1180 vm_map_address_t end,
1181 vm_prot_t prot,
1182 unsigned int options,
1183 void *args), PMAP_PROTECT_OPTIONS_INDEX);
1184
1185 PMAP_SUPPORT_PROTOTYPES(
1186 kern_return_t,
1187 pmap_query_page_info, (pmap_t pmap,
1188 vm_map_offset_t va,
1189 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
1190
1191 PMAP_SUPPORT_PROTOTYPES(
1192 mach_vm_size_t,
1193 pmap_query_resident, (pmap_t pmap,
1194 vm_map_address_t start,
1195 vm_map_address_t end,
1196 mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
1197
1198 PMAP_SUPPORT_PROTOTYPES(
1199 void,
1200 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
1201
1202 PMAP_SUPPORT_PROTOTYPES(
1203 int,
1204 pmap_remove_options, (pmap_t pmap,
1205 vm_map_address_t start,
1206 vm_map_address_t end,
1207 int options), PMAP_REMOVE_OPTIONS_INDEX);
1208
1209 PMAP_SUPPORT_PROTOTYPES(
1210 kern_return_t,
1211 pmap_return, (boolean_t do_panic,
1212 boolean_t do_recurse), PMAP_RETURN_INDEX);
1213
1214 PMAP_SUPPORT_PROTOTYPES(
1215 void,
1216 pmap_set_cache_attributes, (ppnum_t pn,
1217 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1218
1219 PMAP_SUPPORT_PROTOTYPES(
1220 void,
1221 pmap_update_compressor_page, (ppnum_t pn,
1222 unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
1223
1224 PMAP_SUPPORT_PROTOTYPES(
1225 void,
1226 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1227
1228 #if MACH_ASSERT
1229 PMAP_SUPPORT_PROTOTYPES(
1230 void,
1231 pmap_set_process, (pmap_t pmap,
1232 int pid,
1233 char *procname), PMAP_SET_PROCESS_INDEX);
1234 #endif
1235
1236 PMAP_SUPPORT_PROTOTYPES(
1237 void,
1238 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1239
1240 PMAP_SUPPORT_PROTOTYPES(
1241 kern_return_t,
1242 pmap_unnest_options, (pmap_t grand,
1243 addr64_t vaddr,
1244 uint64_t size,
1245 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1246
1247
1248 PMAP_SUPPORT_PROTOTYPES(
1249 void,
1250 phys_attribute_set, (ppnum_t pn,
1251 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1252
1253
1254 PMAP_SUPPORT_PROTOTYPES(
1255 void,
1256 phys_attribute_clear, (ppnum_t pn,
1257 unsigned int bits,
1258 int options,
1259 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1260
1261 PMAP_SUPPORT_PROTOTYPES(
1262 void,
1263 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1264
1265 PMAP_SUPPORT_PROTOTYPES(
1266 void,
1267 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
1268
1269 PMAP_SUPPORT_PROTOTYPES(
1270 void,
1271 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
1272
1273
1274 PMAP_SUPPORT_PROTOTYPES(
1275 void,
1276 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
1277
1278 PMAP_SUPPORT_PROTOTYPES(
1279 void,
1280 pmap_trim, (pmap_t grand,
1281 pmap_t subord,
1282 addr64_t vstart,
1283 addr64_t nstart,
1284 uint64_t size), PMAP_TRIM_INDEX);
1285
1286
1287
1288
1289
1290 void pmap_footprint_suspend(vm_map_t map,
1291 boolean_t suspend);
1292 PMAP_SUPPORT_PROTOTYPES(
1293 void,
1294 pmap_footprint_suspend, (vm_map_t map,
1295 boolean_t suspend),
1296 PMAP_FOOTPRINT_SUSPEND_INDEX);
1297
1298
1299 #if CONFIG_PGTRACE
1300 boolean_t pgtrace_enabled = 0;
1301
1302 typedef struct {
1303 queue_chain_t chain;
1304
1305 /*
1306 * pmap - pmap for below addresses
1307 * ova - original va page address
1308 * cva - clone va addresses for pre, target and post pages
1309 * cva_spte - clone saved ptes
1310 * range - trace range in this map
1311 * cloned - has been cloned or not
1312 */
1313 pmap_t pmap;
1314 vm_map_offset_t ova;
1315 vm_map_offset_t cva[3];
1316 pt_entry_t cva_spte[3];
1317 struct {
1318 pmap_paddr_t start;
1319 pmap_paddr_t end;
1320 } range;
1321 bool cloned;
1322 } pmap_pgtrace_map_t;
1323
1324 static void pmap_pgtrace_init(void);
1325 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
1326 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
1327 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
1328 #endif
1329
1330 #if (__ARM_VMSA__ > 7)
1331 /*
1332 * The low global vector page is mapped at a fixed alias.
1333 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1334 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1335 * to check both addresses anyway for backward compatibility. So for now
1336 * we leave H6 and H7 where they were.
1337 */
1338 #if (ARM_PGSHIFT == 14)
1339 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1340 #else
1341 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1342 #endif
1343
1344 #else
1345 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1346 #endif
1347
1348 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1349 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1350 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1351 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
1352
1353 int pt_fake_zone_index = -1; /* index of pmap fake zone */
1354
1355
1356
1357 /*
1358 * Allocates and initializes a per-CPU data structure for the pmap.
1359 */
1360 MARK_AS_PMAP_TEXT static void
1361 pmap_cpu_data_init_internal(unsigned int cpu_number)
1362 {
1363 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
1364
1365 pmap_cpu_data->cpu_number = cpu_number;
1366 }
1367
1368 void
1369 pmap_cpu_data_init(void)
1370 {
1371 pmap_cpu_data_init_internal(cpu_number());
1372 }
1373
1374 static void
1375 pmap_cpu_data_array_init(void)
1376 {
1377
1378 pmap_cpu_data_init();
1379 }
1380
1381 pmap_cpu_data_t *
1382 pmap_get_cpu_data(void)
1383 {
1384 pmap_cpu_data_t * pmap_cpu_data = NULL;
1385
1386 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
1387
1388 return pmap_cpu_data;
1389 }
1390
1391
1392 /* TODO */
1393 pmap_paddr_t
1394 pmap_pages_reclaim(
1395 void)
1396 {
1397 boolean_t found_page;
1398 unsigned i;
1399 pt_desc_t *ptdp;
1400
1401
1402 /*
1403 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1404 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1405 * entry and it contains at least one valid pte entry.
1406 *
1407 * In a loop, check for a page in the reclaimed pt page list.
1408 * if one is present, unlink that page and return the physical page address.
1409 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1410 * If found, invoke pmap_remove_range() on its pmap and address range then
1411 * deallocates that pt page. This will end up adding the pt page to the
1412 * reclaimed pt page list.
1413 * If no eligible page were found in the pt page list, panic.
1414 */
1415
1416 pmap_simple_lock(&pmap_pages_lock);
1417 pmap_pages_request_count++;
1418 pmap_pages_request_acum++;
1419
1420 while (1) {
1421 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
1422 page_free_entry_t *page_entry;
1423
1424 page_entry = pmap_pages_reclaim_list;
1425 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
1426 pmap_simple_unlock(&pmap_pages_lock);
1427
1428 return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
1429 }
1430
1431 pmap_simple_unlock(&pmap_pages_lock);
1432
1433 pmap_simple_lock(&pt_pages_lock);
1434 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
1435 found_page = FALSE;
1436
1437 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
1438 if ((ptdp->pmap->nested == FALSE)
1439 && (pmap_simple_lock_try(&ptdp->pmap->lock))) {
1440 assert(ptdp->pmap != kernel_pmap);
1441 unsigned refcnt_acc = 0;
1442 unsigned wiredcnt_acc = 0;
1443
1444 for (i = 0; i < PT_INDEX_MAX; i++) {
1445 if (ptdp->pt_cnt[i].refcnt == PT_DESC_REFCOUNT) {
1446 /* Do not attempt to free a page that contains an L2 table */
1447 refcnt_acc = 0;
1448 break;
1449 }
1450 refcnt_acc += ptdp->pt_cnt[i].refcnt;
1451 wiredcnt_acc += ptdp->pt_cnt[i].wiredcnt;
1452 }
1453 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
1454 found_page = TRUE;
1455 /* Leave ptdp->pmap locked here. We're about to reclaim
1456 * a tt page from it, so we don't want anyone else messing
1457 * with it while we do that. */
1458 break;
1459 }
1460 pmap_simple_unlock(&ptdp->pmap->lock);
1461 }
1462 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
1463 }
1464 if (!found_page) {
1465 panic("pmap_pages_reclaim(): No eligible page in pt_page_list\n");
1466 } else {
1467 int remove_count = 0;
1468 vm_map_address_t va;
1469 pmap_t pmap;
1470 pt_entry_t *bpte, *epte;
1471 pt_entry_t *pte_p;
1472 tt_entry_t *tte_p;
1473 uint32_t rmv_spte = 0;
1474
1475 pmap_simple_unlock(&pt_pages_lock);
1476 pmap = ptdp->pmap;
1477 PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
1478 for (i = 0; i < PT_INDEX_MAX; i++) {
1479 va = ptdp->pt_map[i].va;
1480
1481 /* If the VA is bogus, this may represent an unallocated region
1482 * or one which is in transition (already being freed or expanded).
1483 * Don't try to remove mappings here. */
1484 if (va == (vm_offset_t)-1) {
1485 continue;
1486 }
1487
1488 tte_p = pmap_tte(pmap, va);
1489 if ((tte_p != (tt_entry_t *) NULL)
1490 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
1491 #if (__ARM_VMSA__ == 7)
1492 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1493 bpte = &pte_p[ptenum(va)];
1494 epte = bpte + PAGE_SIZE / sizeof(pt_entry_t);
1495 #else
1496 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1497 bpte = &pte_p[tt3_index(pmap, va)];
1498 epte = bpte + PAGE_SIZE / sizeof(pt_entry_t);
1499 #endif
1500 /*
1501 * Use PMAP_OPTIONS_REMOVE to clear any
1502 * "compressed" markers and update the
1503 * "compressed" counter in pmap->stats.
1504 * This means that we lose accounting for
1505 * any compressed pages in this range
1506 * but the alternative is to not be able
1507 * to account for their future decompression,
1508 * which could cause the counter to drift
1509 * more and more.
1510 */
1511 remove_count += pmap_remove_range_options(
1512 pmap, va, bpte, epte,
1513 &rmv_spte, PMAP_OPTIONS_REMOVE);
1514 if (ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0) {
1515 panic("pmap_pages_reclaim(): ptdp %p, count %d\n", ptdp, ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt);
1516 }
1517 #if (__ARM_VMSA__ == 7)
1518 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
1519 flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->asid & 0xff));
1520 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->asid & 0xff));
1521 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->asid & 0xff));
1522 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->asid & 0xff));
1523 #else
1524 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
1525 flush_mmu_tlb_entry_async(tlbi_addr(va & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
1526 #endif
1527
1528 if (remove_count > 0) {
1529 #if (__ARM_VMSA__ == 7)
1530 flush_mmu_tlb_region_asid_async(va, 4 * ARM_TT_L1_SIZE, pmap);
1531 #else
1532 flush_mmu_tlb_region_asid_async(va, ARM_TT_L2_SIZE, pmap);
1533 #endif
1534 }
1535 }
1536 }
1537 sync_tlb_flush();
1538 // Undo the lock we grabbed when we found ptdp above
1539 PMAP_UNLOCK(pmap);
1540 }
1541 pmap_simple_lock(&pmap_pages_lock);
1542 }
1543 }
1544
1545
1546 static kern_return_t
1547 pmap_pages_alloc(
1548 pmap_paddr_t *pa,
1549 unsigned size,
1550 unsigned option)
1551 {
1552 vm_page_t m = VM_PAGE_NULL, m_prev;
1553
1554 if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
1555 assert(size == PAGE_SIZE);
1556 *pa = pmap_pages_reclaim();
1557 return KERN_SUCCESS;
1558 }
1559 if (size == PAGE_SIZE) {
1560 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
1561 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1562 return KERN_RESOURCE_SHORTAGE;
1563 }
1564
1565 VM_PAGE_WAIT();
1566 }
1567 vm_page_lock_queues();
1568 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
1569 vm_page_unlock_queues();
1570 }
1571 if (size == 2 * PAGE_SIZE) {
1572 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
1573 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1574 return KERN_RESOURCE_SHORTAGE;
1575 }
1576
1577 VM_PAGE_WAIT();
1578 }
1579 }
1580
1581 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
1582
1583 vm_object_lock(pmap_object);
1584 while (m != VM_PAGE_NULL) {
1585 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
1586 m_prev = m;
1587 m = NEXT_PAGE(m_prev);
1588 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
1589 }
1590 vm_object_unlock(pmap_object);
1591
1592 OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
1593 OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
1594
1595 return KERN_SUCCESS;
1596 }
1597
1598
1599 static void
1600 pmap_pages_free(
1601 pmap_paddr_t pa,
1602 unsigned size)
1603 {
1604 pmap_simple_lock(&pmap_pages_lock);
1605
1606 if (pmap_pages_request_count != 0) {
1607 page_free_entry_t *page_entry;
1608
1609 pmap_pages_request_count--;
1610 page_entry = (page_free_entry_t *)phystokv(pa);
1611 page_entry->next = pmap_pages_reclaim_list;
1612 pmap_pages_reclaim_list = page_entry;
1613 pmap_simple_unlock(&pmap_pages_lock);
1614
1615 return;
1616 }
1617
1618 pmap_simple_unlock(&pmap_pages_lock);
1619
1620 vm_page_t m;
1621 pmap_paddr_t pa_max;
1622
1623 OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
1624
1625 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
1626 vm_object_lock(pmap_object);
1627 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
1628 assert(m != VM_PAGE_NULL);
1629 assert(VM_PAGE_WIRED(m));
1630 vm_page_lock_queues();
1631 vm_page_free(m);
1632 vm_page_unlock_queues();
1633 vm_object_unlock(pmap_object);
1634 }
1635 }
1636
1637 static inline void
1638 PMAP_ZINFO_PALLOC(
1639 pmap_t pmap, int bytes)
1640 {
1641 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
1642 }
1643
1644 static inline void
1645 PMAP_ZINFO_PFREE(
1646 pmap_t pmap,
1647 int bytes)
1648 {
1649 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
1650 }
1651
1652 static inline void
1653 pmap_tt_ledger_credit(
1654 pmap_t pmap,
1655 vm_size_t size)
1656 {
1657 if (pmap != kernel_pmap) {
1658 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
1659 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
1660 }
1661 }
1662
1663 static inline void
1664 pmap_tt_ledger_debit(
1665 pmap_t pmap,
1666 vm_size_t size)
1667 {
1668 if (pmap != kernel_pmap) {
1669 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
1670 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
1671 }
1672 }
1673
1674 static unsigned int
1675 alloc_asid(
1676 void)
1677 {
1678 unsigned int asid_bitmap_index;
1679
1680 pmap_simple_lock(&pmaps_lock);
1681 for (asid_bitmap_index = 0; asid_bitmap_index < (MAX_ASID / (sizeof(uint32_t) * NBBY)); asid_bitmap_index++) {
1682 unsigned int temp = ffs(asid_bitmap[asid_bitmap_index]);
1683 if (temp > 0) {
1684 temp -= 1;
1685 asid_bitmap[asid_bitmap_index] &= ~(1 << temp);
1686 #if __ARM_KERNEL_PROTECT__
1687 /*
1688 * We need two ASIDs: n and (n | 1). n is used for EL0,
1689 * (n | 1) for EL1.
1690 */
1691 unsigned int temp2 = temp | 1;
1692 assert(temp2 < MAX_ASID);
1693 assert(temp2 < 32);
1694 assert(temp2 != temp);
1695 assert(asid_bitmap[asid_bitmap_index] & (1 << temp2));
1696
1697 /* Grab the second ASID. */
1698 asid_bitmap[asid_bitmap_index] &= ~(1 << temp2);
1699 #endif /* __ARM_KERNEL_PROTECT__ */
1700 pmap_simple_unlock(&pmaps_lock);
1701
1702 /*
1703 * We should never vend out physical ASID 0 through this
1704 * method, as it belongs to the kernel.
1705 */
1706 assert(((asid_bitmap_index * sizeof(uint32_t) * NBBY + temp) % ARM_MAX_ASID) != 0);
1707
1708 #if __ARM_KERNEL_PROTECT__
1709 /* Or the kernel EL1 ASID. */
1710 assert(((asid_bitmap_index * sizeof(uint32_t) * NBBY + temp) % ARM_MAX_ASID) != 1);
1711 #endif /* __ARM_KERNEL_PROTECT__ */
1712
1713 return asid_bitmap_index * sizeof(uint32_t) * NBBY + temp;
1714 }
1715 }
1716 pmap_simple_unlock(&pmaps_lock);
1717 /*
1718 * ToDo: Add code to deal with pmap with no asid panic for now. Not
1719 * an issue with the small config process hard limit
1720 */
1721 panic("alloc_asid(): out of ASID number");
1722 return MAX_ASID;
1723 }
1724
1725 static void
1726 free_asid(
1727 int asid)
1728 {
1729 /* Don't free up any alias of physical ASID 0. */
1730 assert((asid % ARM_MAX_ASID) != 0);
1731
1732 pmap_simple_lock(&pmaps_lock);
1733 setbit(asid, (int *) asid_bitmap);
1734
1735 #if __ARM_KERNEL_PROTECT__
1736 assert((asid | 1) < MAX_ASID);
1737 assert((asid | 1) != asid);
1738 setbit(asid | 1, (int *) asid_bitmap);
1739 #endif /* __ARM_KERNEL_PROTECT__ */
1740
1741 pmap_simple_unlock(&pmaps_lock);
1742 }
1743
1744 #ifndef PMAP_PV_LOAD_FACTOR
1745 #define PMAP_PV_LOAD_FACTOR 1
1746 #endif
1747
1748 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
1749 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
1750 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
1751 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
1752 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
1753 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
1754
1755 uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
1756 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
1757 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
1758
1759 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
1760 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
1761 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
1762 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
1763
1764 thread_t mapping_replenish_thread;
1765 event_t mapping_replenish_event;
1766 event_t pmap_user_pv_throttle_event;
1767 volatile uint32_t mappingrecurse = 0;
1768
1769 uint64_t pmap_pv_throttle_stat;
1770 uint64_t pmap_pv_throttled_waiters;
1771
1772 unsigned pmap_mapping_thread_wakeups;
1773 unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA;
1774 unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA;
1775 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
1776
1777
1778 static void
1779 pv_init(
1780 void)
1781 {
1782 simple_lock_init(&pv_free_list_lock, 0);
1783 simple_lock_init(&pv_kern_free_list_lock, 0);
1784 pv_free_list = PV_ENTRY_NULL;
1785 pv_free_count = 0x0U;
1786 pv_kern_free_list = PV_ENTRY_NULL;
1787 pv_kern_free_count = 0x0U;
1788 }
1789
1790 static inline void PV_ALLOC(pv_entry_t **pv_ep);
1791 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
1792 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
1793 static inline void PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
1794
1795 static inline void pmap_pv_throttle(pmap_t p);
1796
1797 static boolean_t
1798 pv_alloc(
1799 pmap_t pmap,
1800 unsigned int pai,
1801 pv_entry_t **pvepp)
1802 {
1803 if (pmap != NULL) {
1804 PMAP_ASSERT_LOCKED(pmap);
1805 }
1806 ASSERT_PVH_LOCKED(pai);
1807 PV_ALLOC(pvepp);
1808 if (PV_ENTRY_NULL == *pvepp) {
1809 if ((pmap == NULL) || (kernel_pmap == pmap)) {
1810 PV_KERN_ALLOC(pvepp);
1811
1812 if (PV_ENTRY_NULL == *pvepp) {
1813 pv_entry_t *pv_e;
1814 pv_entry_t *pv_eh;
1815 pv_entry_t *pv_et;
1816 int pv_cnt;
1817 unsigned j;
1818 pmap_paddr_t pa;
1819 kern_return_t ret;
1820
1821 UNLOCK_PVH(pai);
1822 if (pmap != NULL) {
1823 PMAP_UNLOCK(pmap);
1824 }
1825
1826 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
1827
1828 if (ret == KERN_RESOURCE_SHORTAGE) {
1829 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
1830 }
1831
1832 if (ret != KERN_SUCCESS) {
1833 panic("%s: failed to alloc page for kernel, ret=%d, "
1834 "pmap=%p, pai=%u, pvepp=%p",
1835 __FUNCTION__, ret,
1836 pmap, pai, pvepp);
1837 }
1838
1839 pv_page_count++;
1840
1841 pv_e = (pv_entry_t *)phystokv(pa);
1842 pv_cnt = 0;
1843 pv_eh = pv_et = PV_ENTRY_NULL;
1844 *pvepp = pv_e;
1845 pv_e++;
1846
1847 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
1848 pv_e->pve_next = pv_eh;
1849 pv_eh = pv_e;
1850
1851 if (pv_et == PV_ENTRY_NULL) {
1852 pv_et = pv_e;
1853 }
1854 pv_cnt++;
1855 pv_e++;
1856 }
1857 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
1858 if (pmap != NULL) {
1859 PMAP_LOCK(pmap);
1860 }
1861 LOCK_PVH(pai);
1862 return FALSE;
1863 }
1864 } else {
1865 UNLOCK_PVH(pai);
1866 PMAP_UNLOCK(pmap);
1867 pmap_pv_throttle(pmap);
1868 {
1869 pv_entry_t *pv_e;
1870 pv_entry_t *pv_eh;
1871 pv_entry_t *pv_et;
1872 int pv_cnt;
1873 unsigned j;
1874 pmap_paddr_t pa;
1875 kern_return_t ret;
1876
1877 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
1878
1879 if (ret != KERN_SUCCESS) {
1880 panic("%s: failed to alloc page, ret=%d, "
1881 "pmap=%p, pai=%u, pvepp=%p",
1882 __FUNCTION__, ret,
1883 pmap, pai, pvepp);
1884 }
1885
1886 pv_page_count++;
1887
1888 pv_e = (pv_entry_t *)phystokv(pa);
1889 pv_cnt = 0;
1890 pv_eh = pv_et = PV_ENTRY_NULL;
1891 *pvepp = pv_e;
1892 pv_e++;
1893
1894 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
1895 pv_e->pve_next = pv_eh;
1896 pv_eh = pv_e;
1897
1898 if (pv_et == PV_ENTRY_NULL) {
1899 pv_et = pv_e;
1900 }
1901 pv_cnt++;
1902 pv_e++;
1903 }
1904 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
1905 }
1906 PMAP_LOCK(pmap);
1907 LOCK_PVH(pai);
1908 return FALSE;
1909 }
1910 }
1911 assert(PV_ENTRY_NULL != *pvepp);
1912 return TRUE;
1913 }
1914
1915 static void
1916 pv_free(
1917 pv_entry_t *pvep)
1918 {
1919 PV_FREE_LIST(pvep, pvep, 1);
1920 }
1921
1922 static void
1923 pv_list_free(
1924 pv_entry_t *pvehp,
1925 pv_entry_t *pvetp,
1926 unsigned int cnt)
1927 {
1928 PV_FREE_LIST(pvehp, pvetp, cnt);
1929 }
1930
1931 static inline void
1932 pv_water_mark_check(void)
1933 {
1934 if ((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark)) {
1935 if (!mappingrecurse && hw_compare_and_store(0, 1, &mappingrecurse)) {
1936 thread_wakeup(&mapping_replenish_event);
1937 }
1938 }
1939 }
1940
1941 static inline void
1942 PV_ALLOC(pv_entry_t **pv_ep)
1943 {
1944 assert(*pv_ep == PV_ENTRY_NULL);
1945 pmap_simple_lock(&pv_free_list_lock);
1946 /*
1947 * If the kernel reserved pool is low, let non-kernel mappings allocate
1948 * synchronously, possibly subject to a throttle.
1949 */
1950 if ((pv_kern_free_count >= pv_kern_low_water_mark) && ((*pv_ep = pv_free_list) != 0)) {
1951 pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
1952 (*pv_ep)->pve_next = PV_ENTRY_NULL;
1953 pv_free_count--;
1954 }
1955
1956 pmap_simple_unlock(&pv_free_list_lock);
1957 }
1958
1959 static inline void
1960 PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt)
1961 {
1962 pmap_simple_lock(&pv_free_list_lock);
1963 pv_et->pve_next = (pv_entry_t *)pv_free_list;
1964 pv_free_list = pv_eh;
1965 pv_free_count += pv_cnt;
1966 pmap_simple_unlock(&pv_free_list_lock);
1967 }
1968
1969 static inline void
1970 PV_KERN_ALLOC(pv_entry_t **pv_e)
1971 {
1972 assert(*pv_e == PV_ENTRY_NULL);
1973 pmap_simple_lock(&pv_kern_free_list_lock);
1974
1975 if ((*pv_e = pv_kern_free_list) != 0) {
1976 pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
1977 (*pv_e)->pve_next = PV_ENTRY_NULL;
1978 pv_kern_free_count--;
1979 pmap_kern_reserve_alloc_stat++;
1980 }
1981
1982 pmap_simple_unlock(&pv_kern_free_list_lock);
1983 }
1984
1985 static inline void
1986 PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt)
1987 {
1988 pmap_simple_lock(&pv_kern_free_list_lock);
1989 pv_et->pve_next = pv_kern_free_list;
1990 pv_kern_free_list = pv_eh;
1991 pv_kern_free_count += pv_cnt;
1992 pmap_simple_unlock(&pv_kern_free_list_lock);
1993 }
1994
1995 static inline void
1996 pmap_pv_throttle(__unused pmap_t p)
1997 {
1998 assert(p != kernel_pmap);
1999 /* Apply throttle on non-kernel mappings */
2000 if (pv_kern_free_count < (pv_kern_low_water_mark / 2)) {
2001 pmap_pv_throttle_stat++;
2002 /* This doesn't need to be strictly accurate, merely a hint
2003 * to eliminate the timeout when the reserve is replenished.
2004 */
2005 pmap_pv_throttled_waiters++;
2006 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
2007 thread_block(THREAD_CONTINUE_NULL);
2008 }
2009 }
2010
2011 /*
2012 * Creates a target number of free pv_entry_t objects for the kernel free list
2013 * and the general free list.
2014 */
2015 MARK_AS_PMAP_TEXT static kern_return_t
2016 mapping_free_prime_internal(void)
2017 {
2018 unsigned j;
2019 pmap_paddr_t pa;
2020 kern_return_t ret;
2021 pv_entry_t *pv_e;
2022 pv_entry_t *pv_eh;
2023 pv_entry_t *pv_et;
2024 int pv_cnt;
2025 int alloc_options = 0;
2026 int needed_pv_cnt = 0;
2027 int target_pv_free_cnt = 0;
2028
2029 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
2030 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
2031
2032 if (mapping_free_prime_internal_done) {
2033 return KERN_FAILURE;
2034 }
2035
2036 if (!mapping_free_prime_internal_called) {
2037 mapping_free_prime_internal_called = TRUE;
2038
2039 pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
2040
2041 /* Alterable via sysctl */
2042 pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
2043
2044 pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
2045 pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
2046 }
2047
2048 pv_cnt = 0;
2049 pv_eh = pv_et = PV_ENTRY_NULL;
2050 target_pv_free_cnt = PV_ALLOC_INITIAL_TARGET;
2051
2052 /*
2053 * We don't take the lock to read pv_free_count, as we should not be
2054 * invoking this from a multithreaded context.
2055 */
2056 needed_pv_cnt = target_pv_free_cnt - pv_free_count;
2057
2058 if (needed_pv_cnt > target_pv_free_cnt) {
2059 needed_pv_cnt = 0;
2060 }
2061
2062 while (pv_cnt < needed_pv_cnt) {
2063 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
2064
2065 assert(ret == KERN_SUCCESS);
2066
2067 pv_page_count++;
2068
2069 pv_e = (pv_entry_t *)phystokv(pa);
2070
2071 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2072 pv_e->pve_next = pv_eh;
2073 pv_eh = pv_e;
2074
2075 if (pv_et == PV_ENTRY_NULL) {
2076 pv_et = pv_e;
2077 }
2078 pv_cnt++;
2079 pv_e++;
2080 }
2081 }
2082
2083 if (pv_cnt) {
2084 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2085 }
2086
2087 pv_cnt = 0;
2088 pv_eh = pv_et = PV_ENTRY_NULL;
2089 target_pv_free_cnt = PV_KERN_ALLOC_INITIAL_TARGET;
2090
2091 /*
2092 * We don't take the lock to read pv_kern_free_count, as we should not
2093 * be invoking this from a multithreaded context.
2094 */
2095 needed_pv_cnt = target_pv_free_cnt - pv_kern_free_count;
2096
2097 if (needed_pv_cnt > target_pv_free_cnt) {
2098 needed_pv_cnt = 0;
2099 }
2100
2101 while (pv_cnt < needed_pv_cnt) {
2102 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
2103
2104 assert(ret == KERN_SUCCESS);
2105 pv_page_count++;
2106
2107 pv_e = (pv_entry_t *)phystokv(pa);
2108
2109 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2110 pv_e->pve_next = pv_eh;
2111 pv_eh = pv_e;
2112
2113 if (pv_et == PV_ENTRY_NULL) {
2114 pv_et = pv_e;
2115 }
2116 pv_cnt++;
2117 pv_e++;
2118 }
2119 }
2120
2121 if (pv_cnt) {
2122 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2123 }
2124
2125 mapping_free_prime_internal_done = TRUE;
2126 return KERN_SUCCESS;
2127 }
2128
2129 void
2130 mapping_free_prime(void)
2131 {
2132 kern_return_t kr = KERN_FAILURE;
2133
2134 kr = mapping_free_prime_internal();
2135
2136 if (kr != KERN_SUCCESS) {
2137 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2138 }
2139 }
2140
2141 void mapping_replenish(void);
2142
2143 void
2144 mapping_adjust(void)
2145 {
2146 kern_return_t mres;
2147
2148 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
2149 if (mres != KERN_SUCCESS) {
2150 panic("pmap: mapping_replenish thread creation failed");
2151 }
2152 thread_deallocate(mapping_replenish_thread);
2153 }
2154
2155 /*
2156 * Fills the kernel and general PV free lists back up to their low watermarks.
2157 */
2158 MARK_AS_PMAP_TEXT static kern_return_t
2159 mapping_replenish_internal(void)
2160 {
2161 pv_entry_t *pv_e;
2162 pv_entry_t *pv_eh;
2163 pv_entry_t *pv_et;
2164 int pv_cnt;
2165 unsigned j;
2166 pmap_paddr_t pa;
2167 kern_return_t ret = KERN_SUCCESS;
2168
2169 while (pv_kern_free_count < pv_kern_low_water_mark) {
2170 pv_cnt = 0;
2171 pv_eh = pv_et = PV_ENTRY_NULL;
2172
2173 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2174 assert(ret == KERN_SUCCESS);
2175
2176 pv_page_count++;
2177
2178 pv_e = (pv_entry_t *)phystokv(pa);
2179
2180 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2181 pv_e->pve_next = pv_eh;
2182 pv_eh = pv_e;
2183
2184 if (pv_et == PV_ENTRY_NULL) {
2185 pv_et = pv_e;
2186 }
2187 pv_cnt++;
2188 pv_e++;
2189 }
2190 pmap_kernel_reserve_replenish_stat += pv_cnt;
2191 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2192 }
2193
2194 while (pv_free_count < pv_low_water_mark) {
2195 pv_cnt = 0;
2196 pv_eh = pv_et = PV_ENTRY_NULL;
2197
2198 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2199 assert(ret == KERN_SUCCESS);
2200
2201 pv_page_count++;
2202
2203 pv_e = (pv_entry_t *)phystokv(pa);
2204
2205 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2206 pv_e->pve_next = pv_eh;
2207 pv_eh = pv_e;
2208
2209 if (pv_et == PV_ENTRY_NULL) {
2210 pv_et = pv_e;
2211 }
2212 pv_cnt++;
2213 pv_e++;
2214 }
2215 pmap_user_reserve_replenish_stat += pv_cnt;
2216 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2217 }
2218
2219 return ret;
2220 }
2221
2222 /*
2223 * Continuation function that keeps the PV free lists from running out of free
2224 * elements.
2225 */
2226 __attribute__((noreturn))
2227 void
2228 mapping_replenish(void)
2229 {
2230 kern_return_t kr;
2231
2232 /* We qualify for VM privileges...*/
2233 current_thread()->options |= TH_OPT_VMPRIV;
2234
2235 for (;;) {
2236 kr = mapping_replenish_internal();
2237
2238 if (kr != KERN_SUCCESS) {
2239 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2240 }
2241
2242 /*
2243 * Wake threads throttled while the kernel reserve was being replenished.
2244 */
2245 if (pmap_pv_throttled_waiters) {
2246 pmap_pv_throttled_waiters = 0;
2247 thread_wakeup(&pmap_user_pv_throttle_event);
2248 }
2249
2250 /* Check if the kernel pool has been depleted since the
2251 * first pass, to reduce refill latency.
2252 */
2253 if (pv_kern_free_count < pv_kern_low_water_mark) {
2254 continue;
2255 }
2256 /* Block sans continuation to avoid yielding kernel stack */
2257 assert_wait(&mapping_replenish_event, THREAD_UNINT);
2258 mappingrecurse = 0;
2259 thread_block(THREAD_CONTINUE_NULL);
2260 pmap_mapping_thread_wakeups++;
2261 }
2262 }
2263
2264
2265 static void
2266 ptd_bootstrap(
2267 pt_desc_t *ptdp,
2268 unsigned int ptd_cnt)
2269 {
2270 simple_lock_init(&ptd_free_list_lock, 0);
2271 while (ptd_cnt != 0) {
2272 (*(void **)ptdp) = (void *)ptd_free_list;
2273 ptd_free_list = ptdp;
2274 ptdp++;
2275 ptd_cnt--;
2276 ptd_free_count++;
2277 }
2278 ptd_preboot = FALSE;
2279 }
2280
2281 static pt_desc_t*
2282 ptd_alloc_unlinked(bool reclaim)
2283 {
2284 pt_desc_t *ptdp;
2285 unsigned i;
2286
2287 if (!ptd_preboot) {
2288 pmap_simple_lock(&ptd_free_list_lock);
2289 }
2290
2291 if (ptd_free_count == 0) {
2292 unsigned int ptd_cnt;
2293 pt_desc_t *ptdp_next;
2294
2295 if (ptd_preboot) {
2296 ptdp = (pt_desc_t *)avail_start;
2297 avail_start += ARM_PGBYTES;
2298 ptdp_next = ptdp;
2299 ptd_cnt = ARM_PGBYTES / sizeof(pt_desc_t);
2300 } else {
2301 pmap_paddr_t pa;
2302 kern_return_t ret;
2303
2304 pmap_simple_unlock(&ptd_free_list_lock);
2305
2306 if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
2307 if (reclaim) {
2308 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
2309 assert(ret == KERN_SUCCESS);
2310 } else {
2311 return NULL;
2312 }
2313 }
2314 ptdp = (pt_desc_t *)phystokv(pa);
2315
2316 pmap_simple_lock(&ptd_free_list_lock);
2317 ptdp_next = ptdp;
2318 ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
2319 }
2320
2321 while (ptd_cnt != 0) {
2322 (*(void **)ptdp_next) = (void *)ptd_free_list;
2323 ptd_free_list = ptdp_next;
2324 ptdp_next++;
2325 ptd_cnt--;
2326 ptd_free_count++;
2327 }
2328 }
2329
2330 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
2331 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
2332 ptd_free_count--;
2333 } else {
2334 panic("out of ptd entry\n");
2335 }
2336
2337 if (!ptd_preboot) {
2338 pmap_simple_unlock(&ptd_free_list_lock);
2339 }
2340
2341 ptdp->pt_page.next = NULL;
2342 ptdp->pt_page.prev = NULL;
2343 ptdp->pmap = NULL;
2344
2345 for (i = 0; i < PT_INDEX_MAX; i++) {
2346 ptdp->pt_map[i].va = (vm_offset_t)-1;
2347 ptdp->pt_cnt[i].refcnt = 0;
2348 ptdp->pt_cnt[i].wiredcnt = 0;
2349 }
2350
2351 return ptdp;
2352 }
2353
2354 static inline pt_desc_t*
2355 ptd_alloc(pmap_t pmap, bool reclaim)
2356 {
2357 pt_desc_t *ptdp = ptd_alloc_unlinked(reclaim);
2358
2359 if (ptdp == NULL) {
2360 return NULL;
2361 }
2362
2363 ptdp->pmap = pmap;
2364 if (pmap != kernel_pmap) {
2365 /* We should never try to reclaim kernel pagetable pages in
2366 * pmap_pages_reclaim(), so don't enter them into the list. */
2367 pmap_simple_lock(&pt_pages_lock);
2368 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2369 pmap_simple_unlock(&pt_pages_lock);
2370 }
2371
2372 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
2373 return ptdp;
2374 }
2375
2376 static void
2377 ptd_deallocate(pt_desc_t *ptdp)
2378 {
2379 pmap_t pmap = ptdp->pmap;
2380
2381 if (ptd_preboot) {
2382 panic("ptd_deallocate(): early boot\n");
2383 }
2384
2385 if (ptdp->pt_page.next != NULL) {
2386 pmap_simple_lock(&pt_pages_lock);
2387 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2388 pmap_simple_unlock(&pt_pages_lock);
2389 }
2390 pmap_simple_lock(&ptd_free_list_lock);
2391 (*(void **)ptdp) = (void *)ptd_free_list;
2392 ptd_free_list = (pt_desc_t *)ptdp;
2393 ptd_free_count++;
2394 pmap_simple_unlock(&ptd_free_list_lock);
2395 if (pmap != NULL) {
2396 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
2397 }
2398 }
2399
2400 static void
2401 ptd_init(
2402 pt_desc_t *ptdp,
2403 pmap_t pmap,
2404 vm_map_address_t va,
2405 unsigned int level,
2406 pt_entry_t *pte_p)
2407 {
2408 if (ptdp->pmap != pmap) {
2409 panic("ptd_init(): pmap mismatch\n");
2410 }
2411
2412 #if (__ARM_VMSA__ == 7)
2413 assert(level == 2);
2414 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
2415 #else
2416 if (level == 3) {
2417 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~ARM_TT_L2_OFFMASK;
2418 } else if (level == 2) {
2419 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~ARM_TT_L1_OFFMASK;
2420 }
2421 #endif
2422 if (level < PMAP_TT_MAX_LEVEL) {
2423 ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
2424 }
2425 }
2426
2427
2428 boolean_t
2429 pmap_valid_address(
2430 pmap_paddr_t addr)
2431 {
2432 return pa_valid(addr);
2433 }
2434
2435 #if (__ARM_VMSA__ == 7)
2436
2437 /*
2438 * Given an offset and a map, compute the address of the
2439 * corresponding translation table entry.
2440 */
2441 static inline tt_entry_t *
2442 pmap_tte(pmap_t pmap,
2443 vm_map_address_t addr)
2444 {
2445 if (!(tte_index(pmap, addr) < pmap->tte_index_max)) {
2446 return (tt_entry_t *)NULL;
2447 }
2448 return &pmap->tte[tte_index(pmap, addr)];
2449 }
2450
2451
2452 /*
2453 * Given an offset and a map, compute the address of the
2454 * pte. If the address is invalid with respect to the map
2455 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2456 *
2457 * This is only used internally.
2458 */
2459 static inline pt_entry_t *
2460 pmap_pte(
2461 pmap_t pmap,
2462 vm_map_address_t addr)
2463 {
2464 pt_entry_t *ptp;
2465 tt_entry_t *ttp;
2466 tt_entry_t tte;
2467
2468 ttp = pmap_tte(pmap, addr);
2469 if (ttp == (tt_entry_t *)NULL) {
2470 return PT_ENTRY_NULL;
2471 }
2472 tte = *ttp;
2473 #if MACH_ASSERT
2474 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
2475 panic("Attempt to demote L1 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2476 }
2477 #endif
2478 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
2479 return PT_ENTRY_NULL;
2480 }
2481 ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
2482 return ptp;
2483 }
2484
2485 #else
2486
2487 /*
2488 * Given an offset and a map, compute the address of level 1 translation table entry.
2489 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2490 */
2491 static inline tt_entry_t *
2492 pmap_tt1e(pmap_t pmap,
2493 vm_map_address_t addr)
2494 {
2495 /* Level 0 currently unused */
2496 #if __ARM64_TWO_LEVEL_PMAP__
2497 #pragma unused(pmap, addr)
2498 panic("pmap_tt1e called on a two level pmap");
2499 return NULL;
2500 #else
2501 return &pmap->tte[tt1_index(pmap, addr)];
2502 #endif
2503 }
2504
2505 /*
2506 * Given an offset and a map, compute the address of level 2 translation table entry.
2507 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2508 */
2509 static inline tt_entry_t *
2510 pmap_tt2e(pmap_t pmap,
2511 vm_map_address_t addr)
2512 {
2513 #if __ARM64_TWO_LEVEL_PMAP__
2514 return &pmap->tte[tt2_index(pmap, addr)];
2515 #else
2516 tt_entry_t *ttp;
2517 tt_entry_t tte;
2518
2519 ttp = pmap_tt1e(pmap, addr);
2520 tte = *ttp;
2521 #if MACH_ASSERT
2522 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
2523 panic("Attempt to demote L1 block (?!): pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2524 }
2525 #endif
2526 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
2527 return PT_ENTRY_NULL;
2528 }
2529
2530 ttp = &((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, addr)];
2531 return (tt_entry_t *)ttp;
2532 #endif
2533 }
2534
2535
2536 /*
2537 * Given an offset and a map, compute the address of level 3 translation table entry.
2538 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2539 */
2540 static inline pt_entry_t *
2541 pmap_tt3e(
2542 pmap_t pmap,
2543 vm_map_address_t addr)
2544 {
2545 pt_entry_t *ptp;
2546 tt_entry_t *ttp;
2547 tt_entry_t tte;
2548
2549 ttp = pmap_tt2e(pmap, addr);
2550 if (ttp == PT_ENTRY_NULL) {
2551 return PT_ENTRY_NULL;
2552 }
2553
2554 tte = *ttp;
2555
2556 #if MACH_ASSERT
2557 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
2558 panic("Attempt to demote L2 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2559 }
2560 #endif
2561 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
2562 return PT_ENTRY_NULL;
2563 }
2564
2565 /* Get third-level (4KB) entry */
2566 ptp = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, addr)]);
2567 return ptp;
2568 }
2569
2570
2571 static inline tt_entry_t *
2572 pmap_tte(
2573 pmap_t pmap,
2574 vm_map_address_t addr)
2575 {
2576 return pmap_tt2e(pmap, addr);
2577 }
2578
2579
2580 static inline pt_entry_t *
2581 pmap_pte(
2582 pmap_t pmap,
2583 vm_map_address_t addr)
2584 {
2585 return pmap_tt3e(pmap, addr);
2586 }
2587
2588 #endif
2589
2590
2591 /*
2592 * Map memory at initialization. The physical addresses being
2593 * mapped are not managed and are never unmapped.
2594 *
2595 * For now, VM is already on, we only need to map the
2596 * specified memory.
2597 */
2598 vm_map_address_t
2599 pmap_map(
2600 vm_map_address_t virt,
2601 vm_offset_t start,
2602 vm_offset_t end,
2603 vm_prot_t prot,
2604 unsigned int flags)
2605 {
2606 kern_return_t kr;
2607 vm_size_t ps;
2608
2609 ps = PAGE_SIZE;
2610 while (start < end) {
2611 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
2612 prot, VM_PROT_NONE, flags, FALSE);
2613
2614 if (kr != KERN_SUCCESS) {
2615 panic("%s: failed pmap_enter, "
2616 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
2617 __FUNCTION__,
2618 (void *) virt, (void *) start, (void *) end, prot, flags);
2619 }
2620
2621 virt += ps;
2622 start += ps;
2623 }
2624 return virt;
2625 }
2626
2627 vm_map_address_t
2628 pmap_map_bd_with_options(
2629 vm_map_address_t virt,
2630 vm_offset_t start,
2631 vm_offset_t end,
2632 vm_prot_t prot,
2633 int32_t options)
2634 {
2635 pt_entry_t tmplate;
2636 pt_entry_t *ptep;
2637 vm_map_address_t vaddr;
2638 vm_offset_t paddr;
2639 pt_entry_t mem_attr;
2640
2641 switch (options & PMAP_MAP_BD_MASK) {
2642 case PMAP_MAP_BD_WCOMB:
2643 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
2644 #if (__ARM_VMSA__ > 7)
2645 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
2646 #else
2647 mem_attr |= ARM_PTE_SH;
2648 #endif
2649 break;
2650 case PMAP_MAP_BD_POSTED:
2651 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
2652 break;
2653 default:
2654 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2655 break;
2656 }
2657
2658 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
2659 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
2660 #if __ARM_KERNEL_PROTECT__
2661 tmplate |= ARM_PTE_NG;
2662 #endif /* __ARM_KERNEL_PROTECT__ */
2663
2664 vaddr = virt;
2665 paddr = start;
2666 while (paddr < end) {
2667 ptep = pmap_pte(kernel_pmap, vaddr);
2668 if (ptep == PT_ENTRY_NULL) {
2669 panic("pmap_map_bd");
2670 }
2671 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2672 WRITE_PTE_STRONG(ptep, tmplate);
2673
2674 pte_increment_pa(tmplate);
2675 vaddr += PAGE_SIZE;
2676 paddr += PAGE_SIZE;
2677 }
2678
2679 if (end >= start) {
2680 flush_mmu_tlb_region(virt, (unsigned)(end - start));
2681 }
2682
2683 return vaddr;
2684 }
2685
2686 /*
2687 * Back-door routine for mapping kernel VM at initialization.
2688 * Useful for mapping memory outside the range
2689 * [vm_first_phys, vm_last_phys] (i.e., devices).
2690 * Otherwise like pmap_map.
2691 */
2692 vm_map_address_t
2693 pmap_map_bd(
2694 vm_map_address_t virt,
2695 vm_offset_t start,
2696 vm_offset_t end,
2697 vm_prot_t prot)
2698 {
2699 pt_entry_t tmplate;
2700 pt_entry_t *ptep;
2701 vm_map_address_t vaddr;
2702 vm_offset_t paddr;
2703
2704 /* not cacheable and not buffered */
2705 tmplate = pa_to_pte(start)
2706 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
2707 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
2708 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2709 #if __ARM_KERNEL_PROTECT__
2710 tmplate |= ARM_PTE_NG;
2711 #endif /* __ARM_KERNEL_PROTECT__ */
2712
2713 vaddr = virt;
2714 paddr = start;
2715 while (paddr < end) {
2716 ptep = pmap_pte(kernel_pmap, vaddr);
2717 if (ptep == PT_ENTRY_NULL) {
2718 panic("pmap_map_bd");
2719 }
2720 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2721 WRITE_PTE_STRONG(ptep, tmplate);
2722
2723 pte_increment_pa(tmplate);
2724 vaddr += PAGE_SIZE;
2725 paddr += PAGE_SIZE;
2726 }
2727
2728 if (end >= start) {
2729 flush_mmu_tlb_region(virt, (unsigned)(end - start));
2730 }
2731
2732 return vaddr;
2733 }
2734
2735 /*
2736 * Back-door routine for mapping kernel VM at initialization.
2737 * Useful for mapping memory specific physical addresses in early
2738 * boot (i.e., before kernel_map is initialized).
2739 *
2740 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
2741 */
2742
2743 vm_map_address_t
2744 pmap_map_high_window_bd(
2745 vm_offset_t pa_start,
2746 vm_size_t len,
2747 vm_prot_t prot)
2748 {
2749 pt_entry_t *ptep, pte;
2750 #if (__ARM_VMSA__ == 7)
2751 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
2752 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
2753 #else
2754 vm_map_address_t va_start = VREGION1_START;
2755 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
2756 #endif
2757 vm_map_address_t va_end;
2758 vm_map_address_t va;
2759 vm_size_t offset;
2760
2761 offset = pa_start & PAGE_MASK;
2762 pa_start -= offset;
2763 len += offset;
2764
2765 if (len > (va_max - va_start)) {
2766 panic("pmap_map_high_window_bd: area too large\n");
2767 }
2768
2769 scan:
2770 for (; va_start < va_max; va_start += PAGE_SIZE) {
2771 ptep = pmap_pte(kernel_pmap, va_start);
2772 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2773 if (*ptep == ARM_PTE_TYPE_FAULT) {
2774 break;
2775 }
2776 }
2777 if (va_start > va_max) {
2778 panic("pmap_map_high_window_bd: insufficient pages\n");
2779 }
2780
2781 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
2782 ptep = pmap_pte(kernel_pmap, va_end);
2783 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2784 if (*ptep != ARM_PTE_TYPE_FAULT) {
2785 va_start = va_end + PAGE_SIZE;
2786 goto scan;
2787 }
2788 }
2789
2790 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
2791 ptep = pmap_pte(kernel_pmap, va);
2792 pte = pa_to_pte(pa_start)
2793 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
2794 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
2795 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
2796 #if (__ARM_VMSA__ > 7)
2797 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
2798 #else
2799 pte |= ARM_PTE_SH;
2800 #endif
2801 #if __ARM_KERNEL_PROTECT__
2802 pte |= ARM_PTE_NG;
2803 #endif /* __ARM_KERNEL_PROTECT__ */
2804 WRITE_PTE_STRONG(ptep, pte);
2805 }
2806 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len);
2807 #if KASAN
2808 kasan_notify_address(va_start, len);
2809 #endif
2810 return va_start;
2811 }
2812
2813 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
2814
2815 static vm_size_t
2816 pmap_compute_io_rgns(void)
2817 {
2818 DTEntry entry;
2819 pmap_io_range_t *ranges;
2820 uint64_t rgn_end;
2821 void *prop = NULL;
2822 int err;
2823 unsigned int prop_size;
2824
2825 err = DTLookupEntry(NULL, "/defaults", &entry);
2826 assert(err == kSuccess);
2827
2828 if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
2829 return 0;
2830 }
2831
2832 ranges = prop;
2833 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
2834 if (ranges[i].addr & PAGE_MASK) {
2835 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
2836 }
2837 if (ranges[i].len & PAGE_MASK) {
2838 panic("pmap I/O region %u length 0x%x is not page-aligned", i, ranges[i].len);
2839 }
2840 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end)) {
2841 panic("pmap I/O region %u addr 0x%llx length 0x%x wraps around", i, ranges[i].addr, ranges[i].len);
2842 }
2843 if ((i == 0) || (ranges[i].addr < io_rgn_start)) {
2844 io_rgn_start = ranges[i].addr;
2845 }
2846 if ((i == 0) || (rgn_end > io_rgn_end)) {
2847 io_rgn_end = rgn_end;
2848 }
2849 ++num_io_rgns;
2850 }
2851
2852 if (io_rgn_start & PAGE_MASK) {
2853 panic("pmap I/O region start is not page-aligned!\n");
2854 }
2855
2856 if (io_rgn_end & PAGE_MASK) {
2857 panic("pmap I/O region end is not page-aligned!\n");
2858 }
2859
2860 if (((io_rgn_start <= gPhysBase) && (io_rgn_end > gPhysBase)) ||
2861 ((io_rgn_start < avail_end) && (io_rgn_end >= avail_end)) ||
2862 ((io_rgn_start > gPhysBase) && (io_rgn_end < avail_end))) {
2863 panic("pmap I/O region overlaps physical memory!\n");
2864 }
2865
2866 return num_io_rgns * sizeof(*ranges);
2867 }
2868
2869 /*
2870 * return < 0 for a < b
2871 * 0 for a == b
2872 * > 0 for a > b
2873 */
2874 typedef int (*cmpfunc_t)(const void *a, const void *b);
2875
2876 extern void
2877 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
2878
2879 static int
2880 cmp_io_rgns(const void *a, const void *b)
2881 {
2882 const pmap_io_range_t *range_a = a;
2883 const pmap_io_range_t *range_b = b;
2884 if ((range_b->addr + range_b->len) <= range_a->addr) {
2885 return 1;
2886 } else if ((range_a->addr + range_a->len) <= range_b->addr) {
2887 return -1;
2888 } else {
2889 return 0;
2890 }
2891 }
2892
2893 static void
2894 pmap_load_io_rgns(void)
2895 {
2896 DTEntry entry;
2897 pmap_io_range_t *ranges;
2898 void *prop = NULL;
2899 int err;
2900 unsigned int prop_size;
2901
2902 if (num_io_rgns == 0) {
2903 return;
2904 }
2905
2906 err = DTLookupEntry(NULL, "/defaults", &entry);
2907 assert(err == kSuccess);
2908
2909 err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
2910 assert(err == kSuccess);
2911
2912 ranges = prop;
2913 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
2914 io_attr_table[i] = ranges[i];
2915 }
2916
2917 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
2918 }
2919
2920 #if __arm64__
2921 /*
2922 * pmap_get_arm64_prot
2923 *
2924 * return effective armv8 VMSA block protections including
2925 * table AP/PXN/XN overrides of a pmap entry
2926 *
2927 */
2928
2929 uint64_t
2930 pmap_get_arm64_prot(
2931 pmap_t pmap,
2932 vm_offset_t addr)
2933 {
2934 uint64_t tte;
2935 uint64_t tt_type, table_ap, table_xn, table_pxn;
2936 uint64_t prot = 0;
2937
2938 tte = *pmap_tt1e(pmap, addr);
2939
2940 if (!(tte & ARM_TTE_VALID)) {
2941 return 0;
2942 }
2943
2944 tt_type = tte & ARM_TTE_TYPE_MASK;
2945
2946 if (tt_type == ARM_TTE_TYPE_BLOCK) {
2947 return prot | (tte & ARM_TTE_BLOCK_NX) | (tte & ARM_TTE_BLOCK_PNX) | (tte & ARM_TTE_BLOCK_APMASK) | ARM_TTE_VALID;
2948 }
2949
2950 table_ap = (tte >> ARM_TTE_TABLE_APSHIFT) & 0x3;
2951 table_xn = tte & ARM_TTE_TABLE_XN;
2952 table_pxn = tte & ARM_TTE_TABLE_PXN;
2953
2954 prot |= (table_ap << ARM_TTE_BLOCK_APSHIFT) | (table_xn ? ARM_TTE_BLOCK_NX : 0) | (table_pxn ? ARM_TTE_BLOCK_PNX : 0);
2955
2956 tte = *pmap_tt2e(pmap, addr);
2957 if (!(tte & ARM_TTE_VALID)) {
2958 return 0;
2959 }
2960
2961 tt_type = tte & ARM_TTE_TYPE_MASK;
2962
2963 if (tt_type == ARM_TTE_TYPE_BLOCK) {
2964 return prot | (tte & ARM_TTE_BLOCK_NX) | (tte & ARM_TTE_BLOCK_PNX) | (tte & ARM_TTE_BLOCK_APMASK) | ARM_TTE_VALID;
2965 }
2966
2967 table_ap = (tte >> ARM_TTE_TABLE_APSHIFT) & 0x3;
2968 table_xn = tte & ARM_TTE_TABLE_XN;
2969 table_pxn = tte & ARM_TTE_TABLE_PXN;
2970
2971 prot |= (table_ap << ARM_TTE_BLOCK_APSHIFT) | (table_xn ? ARM_TTE_BLOCK_NX : 0) | (table_pxn ? ARM_TTE_BLOCK_PNX : 0);
2972
2973 tte = *pmap_tt3e(pmap, addr);
2974 if (!(tte & ARM_TTE_VALID)) {
2975 return 0;
2976 }
2977
2978 return prot | (tte & ARM_TTE_BLOCK_NX) | (tte & ARM_TTE_BLOCK_PNX) | (tte & ARM_TTE_BLOCK_APMASK) | ARM_TTE_VALID;
2979 }
2980 #endif /* __arm64__ */
2981
2982
2983 /*
2984 * Bootstrap the system enough to run with virtual memory.
2985 *
2986 * The early VM initialization code has already allocated
2987 * the first CPU's translation table and made entries for
2988 * all the one-to-one mappings to be found there.
2989 *
2990 * We must set up the kernel pmap structures, the
2991 * physical-to-virtual translation lookup tables for the
2992 * physical memory to be managed (between avail_start and
2993 * avail_end).
2994 *
2995 * Map the kernel's code and data, and allocate the system page table.
2996 * Page_size must already be set.
2997 *
2998 * Parameters:
2999 * first_avail first available physical page -
3000 * after kernel page tables
3001 * avail_start PA of first managed physical page
3002 * avail_end PA of last managed physical page
3003 */
3004
3005 void
3006 pmap_bootstrap(
3007 vm_offset_t vstart)
3008 {
3009 pmap_paddr_t pmap_struct_start;
3010 vm_size_t pv_head_size;
3011 vm_size_t ptd_root_table_size;
3012 vm_size_t pp_attr_table_size;
3013 vm_size_t io_attr_table_size;
3014 unsigned int npages;
3015 unsigned int i;
3016 vm_map_offset_t maxoffset;
3017
3018 lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
3019
3020
3021 #if DEVELOPMENT || DEBUG
3022 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
3023 kprintf("Kernel traces for pmap operations enabled\n");
3024 }
3025 #endif
3026
3027 /*
3028 * Initialize the kernel pmap.
3029 */
3030 pmap_stamp = 1;
3031 kernel_pmap->tte = cpu_tte;
3032 kernel_pmap->ttep = cpu_ttep;
3033 #if (__ARM_VMSA__ > 7)
3034 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
3035 #else
3036 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3037 #endif
3038 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
3039 kernel_pmap->ref_count = 1;
3040 kernel_pmap->gc_status = 0;
3041 kernel_pmap->nx_enabled = TRUE;
3042 #ifdef __arm64__
3043 kernel_pmap->is_64bit = TRUE;
3044 #else
3045 kernel_pmap->is_64bit = FALSE;
3046 #endif
3047 kernel_pmap->stamp = hw_atomic_add(&pmap_stamp, 1);
3048
3049 kernel_pmap->nested_region_grand_addr = 0x0ULL;
3050 kernel_pmap->nested_region_subord_addr = 0x0ULL;
3051 kernel_pmap->nested_region_size = 0x0ULL;
3052 kernel_pmap->nested_region_asid_bitmap = NULL;
3053 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
3054
3055 #if (__ARM_VMSA__ == 7)
3056 kernel_pmap->tte_index_max = 4 * NTTES;
3057 #else
3058 kernel_pmap->tte_index_max = (ARM_PGBYTES / sizeof(tt_entry_t));
3059 #endif
3060 kernel_pmap->prev_tte = (tt_entry_t *) NULL;
3061
3062 PMAP_LOCK_INIT(kernel_pmap);
3063 #if (__ARM_VMSA__ == 7)
3064 simple_lock_init(&kernel_pmap->tt1_lock, 0);
3065 kernel_pmap->cpu_ref = 0;
3066 #endif
3067 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
3068
3069 /* allocate space for and initialize the bookkeeping structures */
3070 io_attr_table_size = pmap_compute_io_rgns();
3071 npages = (unsigned int)atop(mem_size);
3072 pp_attr_table_size = npages * sizeof(pp_attr_t);
3073 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
3074 // allocate enough initial PTDs to map twice the available physical memory
3075 ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
3076
3077 pmap_struct_start = avail_start;
3078
3079 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
3080 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
3081 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
3082 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
3083 pv_head_table = (pv_entry_t **) phystokv(avail_start);
3084 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
3085 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
3086 avail_start = round_page(avail_start + ptd_root_table_size);
3087
3088 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
3089
3090 pmap_load_io_rgns();
3091 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
3092
3093 pmap_cpu_data_array_init();
3094
3095 vm_first_phys = gPhysBase;
3096 vm_last_phys = trunc_page(avail_end);
3097
3098 simple_lock_init(&pmaps_lock, 0);
3099 queue_init(&map_pmap_list);
3100 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
3101 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
3102 free_page_size_tt_count = 0;
3103 free_page_size_tt_max = 0;
3104 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
3105 free_two_page_size_tt_count = 0;
3106 free_two_page_size_tt_max = 0;
3107 free_tt_list = TT_FREE_ENTRY_NULL;
3108 free_tt_count = 0;
3109 free_tt_max = 0;
3110
3111 simple_lock_init(&pt_pages_lock, 0);
3112 queue_init(&pt_page_list);
3113
3114 simple_lock_init(&pmap_pages_lock, 0);
3115 pmap_pages_request_count = 0;
3116 pmap_pages_request_acum = 0;
3117 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
3118
3119 virtual_space_start = vstart;
3120 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
3121
3122 /* mark all the address spaces in use */
3123 for (i = 0; i < MAX_ASID / (sizeof(uint32_t) * NBBY); i++) {
3124 asid_bitmap[i] = 0xffffffff;
3125 }
3126
3127 /*
3128 * The kernel gets ASID 0, and all aliases of it. This is
3129 * important because ASID 0 is global; if we vend ASID 0
3130 * out to a user pmap, those translations will show up in
3131 * other processes through the TLB.
3132 */
3133 for (i = 0; i < MAX_ASID; i += ARM_MAX_ASID) {
3134 asid_bitmap[i / (sizeof(uint32_t) * NBBY)] &= ~(1 << (i % (sizeof(uint32_t) * NBBY)));
3135
3136 #if __ARM_KERNEL_PROTECT__
3137 assert((i + 1) < MAX_ASID);
3138 asid_bitmap[(i + 1) / (sizeof(uint32_t) * NBBY)] &= ~(1 << ((i + 1) % (sizeof(uint32_t) * NBBY)));
3139 #endif /* __ARM_KERNEL_PROTECT__ */
3140 }
3141
3142 kernel_pmap->asid = 0;
3143 kernel_pmap->vasid = 0;
3144
3145
3146 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
3147 maxoffset = trunc_page(maxoffset);
3148 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
3149 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
3150 arm_pmap_max_offset_default = maxoffset;
3151 }
3152 }
3153 #if defined(__arm64__)
3154 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
3155 maxoffset = trunc_page(maxoffset);
3156 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
3157 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
3158 arm64_pmap_max_offset_default = maxoffset;
3159 }
3160 }
3161 #endif
3162
3163 #if DEVELOPMENT || DEBUG
3164 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof(panic_on_unsigned_execute));
3165 #endif /* DEVELOPMENT || DEBUG */
3166
3167 pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
3168 pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
3169
3170 simple_lock_init(&phys_backup_lock, 0);
3171
3172
3173 #if MACH_ASSERT
3174 PE_parse_boot_argn("pmap_stats_assert",
3175 &pmap_stats_assert,
3176 sizeof(pmap_stats_assert));
3177 PE_parse_boot_argn("vm_footprint_suspend_allowed",
3178 &vm_footprint_suspend_allowed,
3179 sizeof(vm_footprint_suspend_allowed));
3180 #endif /* MACH_ASSERT */
3181
3182 #if KASAN
3183 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
3184 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
3185 #endif /* KASAN */
3186 }
3187
3188
3189 void
3190 pmap_virtual_space(
3191 vm_offset_t *startp,
3192 vm_offset_t *endp
3193 )
3194 {
3195 *startp = virtual_space_start;
3196 *endp = virtual_space_end;
3197 }
3198
3199
3200 boolean_t
3201 pmap_virtual_region(
3202 unsigned int region_select,
3203 vm_map_offset_t *startp,
3204 vm_map_size_t *size
3205 )
3206 {
3207 boolean_t ret = FALSE;
3208 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3209 if (region_select == 0) {
3210 /*
3211 * In this config, the bootstrap mappings should occupy their own L2
3212 * TTs, as they should be immutable after boot. Having the associated
3213 * TTEs and PTEs in their own pages allows us to lock down those pages,
3214 * while allowing the rest of the kernel address range to be remapped.
3215 */
3216 #if (__ARM_VMSA__ > 7)
3217 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
3218 #else
3219 #error Unsupported configuration
3220 #endif
3221 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
3222 ret = TRUE;
3223 }
3224 #else
3225 #if (__ARM_VMSA__ > 7)
3226 unsigned long low_global_vr_mask = 0;
3227 vm_map_size_t low_global_vr_size = 0;
3228 #endif
3229
3230 if (region_select == 0) {
3231 #if (__ARM_VMSA__ == 7)
3232 *startp = gVirtBase & 0xFFC00000;
3233 *size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3234 #else
3235 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3236 if (!TEST_PAGE_SIZE_4K) {
3237 *startp = gVirtBase & 0xFFFFFFFFFE000000;
3238 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3239 } else {
3240 *startp = gVirtBase & 0xFFFFFFFFFF800000;
3241 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3242 }
3243 #endif
3244 ret = TRUE;
3245 }
3246 if (region_select == 1) {
3247 *startp = VREGION1_START;
3248 *size = VREGION1_SIZE;
3249 ret = TRUE;
3250 }
3251 #if (__ARM_VMSA__ > 7)
3252 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3253 if (!TEST_PAGE_SIZE_4K) {
3254 low_global_vr_mask = 0xFFFFFFFFFE000000;
3255 low_global_vr_size = 0x2000000;
3256 } else {
3257 low_global_vr_mask = 0xFFFFFFFFFF800000;
3258 low_global_vr_size = 0x800000;
3259 }
3260
3261 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
3262 *startp = LOW_GLOBAL_BASE_ADDRESS;
3263 *size = low_global_vr_size;
3264 ret = TRUE;
3265 }
3266
3267 if (region_select == 3) {
3268 /* In this config, we allow the bootstrap mappings to occupy the same
3269 * page table pages as the heap.
3270 */
3271 *startp = VM_MIN_KERNEL_ADDRESS;
3272 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
3273 ret = TRUE;
3274 }
3275 #endif
3276 #endif
3277 return ret;
3278 }
3279
3280 unsigned int
3281 pmap_free_pages(
3282 void)
3283 {
3284 return (unsigned int)atop(avail_end - first_avail);
3285 }
3286
3287
3288 boolean_t
3289 pmap_next_page_hi(
3290 ppnum_t * pnum)
3291 {
3292 return pmap_next_page(pnum);
3293 }
3294
3295
3296 boolean_t
3297 pmap_next_page(
3298 ppnum_t *pnum)
3299 {
3300 if (first_avail != avail_end) {
3301 *pnum = (ppnum_t)atop(first_avail);
3302 first_avail += PAGE_SIZE;
3303 return TRUE;
3304 }
3305 return FALSE;
3306 }
3307
3308
3309 /*
3310 * Initialize the pmap module.
3311 * Called by vm_init, to initialize any structures that the pmap
3312 * system needs to map virtual memory.
3313 */
3314 void
3315 pmap_init(
3316 void)
3317 {
3318 /*
3319 * Protect page zero in the kernel map.
3320 * (can be overruled by permanent transltion
3321 * table entries at page zero - see arm_vm_init).
3322 */
3323 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
3324
3325 pmap_initialized = TRUE;
3326
3327 pmap_zone_init();
3328
3329
3330 /*
3331 * Initialize the pmap object (for tracking the vm_page_t
3332 * structures for pages we allocate to be page tables in
3333 * pmap_expand().
3334 */
3335 _vm_object_allocate(mem_size, pmap_object);
3336 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3337
3338 pv_init();
3339
3340 /*
3341 * The value of hard_maxproc may have been scaled, make sure
3342 * it is still less than the value of MAX_ASID.
3343 */
3344 assert(hard_maxproc < MAX_ASID);
3345
3346 #if CONFIG_PGTRACE
3347 pmap_pgtrace_init();
3348 #endif
3349 }
3350
3351 void
3352 pmap_pv_fixup(__unused vm_offset_t start, __unused vm_size_t length)
3353 {
3354 }
3355
3356 boolean_t
3357 pmap_verify_free(
3358 ppnum_t ppnum)
3359 {
3360 pv_entry_t **pv_h;
3361 int pai;
3362 pmap_paddr_t phys = ptoa(ppnum);
3363
3364 assert(phys != vm_page_fictitious_addr);
3365
3366 if (!pa_valid(phys)) {
3367 return FALSE;
3368 }
3369
3370 pai = (int)pa_index(phys);
3371 pv_h = pai_to_pvh(pai);
3372
3373 return pvh_test_type(pv_h, PVH_TYPE_NULL);
3374 }
3375
3376 #if MACH_ASSERT
3377 void
3378 pmap_assert_free(ppnum_t ppnum)
3379 {
3380 assertf(pmap_verify_free(ppnum), "page = 0x%x", ppnum);
3381 (void)ppnum;
3382 }
3383 #endif
3384
3385
3386 /*
3387 * Initialize zones used by pmap.
3388 */
3389 static void
3390 pmap_zone_init(
3391 void)
3392 {
3393 /*
3394 * Create the zone of physical maps
3395 * and the physical-to-virtual entries.
3396 */
3397 pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap) * 256,
3398 PAGE_SIZE, "pmap");
3399 }
3400
3401
3402 void
3403 pmap_ledger_alloc_init(size_t size)
3404 {
3405 panic("%s: unsupported, "
3406 "size=%lu",
3407 __func__, size);
3408 }
3409
3410 ledger_t
3411 pmap_ledger_alloc(void)
3412 {
3413 ledger_t retval = NULL;
3414
3415 panic("%s: unsupported",
3416 __func__);
3417
3418 return retval;
3419 }
3420
3421 void
3422 pmap_ledger_free(ledger_t ledger)
3423 {
3424 panic("%s: unsupported, "
3425 "ledger=%p",
3426 __func__, ledger);
3427 }
3428
3429 /*
3430 * Create and return a physical map.
3431 *
3432 * If the size specified for the map
3433 * is zero, the map is an actual physical
3434 * map, and may be referenced by the
3435 * hardware.
3436 *
3437 * If the size specified is non-zero,
3438 * the map will be used in software only, and
3439 * is bounded by that size.
3440 */
3441 MARK_AS_PMAP_TEXT static pmap_t
3442 pmap_create_internal(
3443 ledger_t ledger,
3444 vm_map_size_t size,
3445 boolean_t is_64bit)
3446 {
3447 unsigned i;
3448 pmap_t p;
3449
3450 /*
3451 * A software use-only map doesn't even need a pmap.
3452 */
3453 if (size != 0) {
3454 return PMAP_NULL;
3455 }
3456
3457
3458 /*
3459 * Allocate a pmap struct from the pmap_zone. Then allocate
3460 * the translation table of the right size for the pmap.
3461 */
3462 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
3463 return PMAP_NULL;
3464 }
3465
3466 if (is_64bit) {
3467 p->min = MACH_VM_MIN_ADDRESS;
3468 p->max = MACH_VM_MAX_ADDRESS;
3469 } else {
3470 p->min = VM_MIN_ADDRESS;
3471 p->max = VM_MAX_ADDRESS;
3472 }
3473
3474 p->nested_region_true_start = 0;
3475 p->nested_region_true_end = ~0;
3476
3477 p->ref_count = 1;
3478 p->gc_status = 0;
3479 p->stamp = hw_atomic_add(&pmap_stamp, 1);
3480 p->nx_enabled = TRUE;
3481 p->is_64bit = is_64bit;
3482 p->nested = FALSE;
3483 p->nested_pmap = PMAP_NULL;
3484
3485
3486
3487 p->ledger = ledger;
3488
3489 PMAP_LOCK_INIT(p);
3490 #if (__ARM_VMSA__ == 7)
3491 simple_lock_init(&p->tt1_lock, 0);
3492 p->cpu_ref = 0;
3493 #endif
3494 memset((void *) &p->stats, 0, sizeof(p->stats));
3495
3496 p->tt_entry_free = (tt_entry_t *)0;
3497
3498 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
3499 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
3500 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
3501
3502 #if (__ARM_VMSA__ == 7)
3503 p->tte_index_max = NTTES;
3504 #else
3505 p->tte_index_max = (PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t));
3506 #endif
3507 p->prev_tte = (tt_entry_t *) NULL;
3508
3509 /* nullify the translation table */
3510 for (i = 0; i < p->tte_index_max; i++) {
3511 p->tte[i] = ARM_TTE_TYPE_FAULT;
3512 }
3513
3514 FLUSH_PTE_RANGE(p->tte, p->tte + p->tte_index_max);
3515
3516 /* assign a asid */
3517 p->vasid = alloc_asid();
3518 p->asid = p->vasid % ARM_MAX_ASID;
3519
3520 /*
3521 * initialize the rest of the structure
3522 */
3523 p->nested_region_grand_addr = 0x0ULL;
3524 p->nested_region_subord_addr = 0x0ULL;
3525 p->nested_region_size = 0x0ULL;
3526 p->nested_region_asid_bitmap = NULL;
3527 p->nested_region_asid_bitmap_size = 0x0UL;
3528
3529 p->nested_has_no_bounds_ref = false;
3530 p->nested_no_bounds_refcnt = 0;
3531 p->nested_bounds_set = false;
3532
3533
3534 #if MACH_ASSERT
3535 p->pmap_stats_assert = TRUE;
3536 p->pmap_pid = 0;
3537 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
3538 #endif /* MACH_ASSERT */
3539 #if DEVELOPMENT || DEBUG
3540 p->footprint_was_suspended = FALSE;
3541 #endif /* DEVELOPMENT || DEBUG */
3542
3543 pmap_simple_lock(&pmaps_lock);
3544 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
3545 pmap_simple_unlock(&pmaps_lock);
3546
3547 return p;
3548 }
3549
3550 pmap_t
3551 pmap_create(
3552 ledger_t ledger,
3553 vm_map_size_t size,
3554 boolean_t is_64bit)
3555 {
3556 pmap_t pmap;
3557
3558 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, is_64bit);
3559
3560 ledger_reference(ledger);
3561
3562 pmap = pmap_create_internal(ledger, size, is_64bit);
3563
3564 if (pmap == PMAP_NULL) {
3565 ledger_dereference(ledger);
3566 }
3567
3568 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
3569
3570 return pmap;
3571 }
3572
3573 #if MACH_ASSERT
3574 MARK_AS_PMAP_TEXT static void
3575 pmap_set_process_internal(
3576 __unused pmap_t pmap,
3577 __unused int pid,
3578 __unused char *procname)
3579 {
3580 #if MACH_ASSERT
3581 if (pmap == NULL) {
3582 return;
3583 }
3584
3585 VALIDATE_PMAP(pmap);
3586
3587 pmap->pmap_pid = pid;
3588 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
3589 if (pmap_ledgers_panic_leeway) {
3590 /*
3591 * XXX FBDP
3592 * Some processes somehow trigger some issues that make
3593 * the pmap stats and ledgers go off track, causing
3594 * some assertion failures and ledger panics.
3595 * Turn off the sanity checks if we allow some ledger leeway
3596 * because of that. We'll still do a final check in
3597 * pmap_check_ledgers() for discrepancies larger than the
3598 * allowed leeway after the address space has been fully
3599 * cleaned up.
3600 */
3601 pmap->pmap_stats_assert = FALSE;
3602 ledger_disable_panic_on_negative(pmap->ledger,
3603 task_ledgers.phys_footprint);
3604 ledger_disable_panic_on_negative(pmap->ledger,
3605 task_ledgers.internal);
3606 ledger_disable_panic_on_negative(pmap->ledger,
3607 task_ledgers.internal_compressed);
3608 ledger_disable_panic_on_negative(pmap->ledger,
3609 task_ledgers.iokit_mapped);
3610 ledger_disable_panic_on_negative(pmap->ledger,
3611 task_ledgers.alternate_accounting);
3612 ledger_disable_panic_on_negative(pmap->ledger,
3613 task_ledgers.alternate_accounting_compressed);
3614 }
3615 #endif /* MACH_ASSERT */
3616 }
3617 #endif /* MACH_ASSERT*/
3618
3619 #if MACH_ASSERT
3620 void
3621 pmap_set_process(
3622 pmap_t pmap,
3623 int pid,
3624 char *procname)
3625 {
3626 pmap_set_process_internal(pmap, pid, procname);
3627 }
3628
3629 /*
3630 * We maintain stats and ledgers so that a task's physical footprint is:
3631 * phys_footprint = ((internal - alternate_accounting)
3632 * + (internal_compressed - alternate_accounting_compressed)
3633 * + iokit_mapped
3634 * + purgeable_nonvolatile
3635 * + purgeable_nonvolatile_compressed
3636 * + page_table)
3637 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3638 */
3639
3640 struct {
3641 uint64_t num_pmaps_checked;
3642
3643 int phys_footprint_over;
3644 ledger_amount_t phys_footprint_over_total;
3645 ledger_amount_t phys_footprint_over_max;
3646 int phys_footprint_under;
3647 ledger_amount_t phys_footprint_under_total;
3648 ledger_amount_t phys_footprint_under_max;
3649
3650 int internal_over;
3651 ledger_amount_t internal_over_total;
3652 ledger_amount_t internal_over_max;
3653 int internal_under;
3654 ledger_amount_t internal_under_total;
3655 ledger_amount_t internal_under_max;
3656
3657 int internal_compressed_over;
3658 ledger_amount_t internal_compressed_over_total;
3659 ledger_amount_t internal_compressed_over_max;
3660 int internal_compressed_under;
3661 ledger_amount_t internal_compressed_under_total;
3662 ledger_amount_t internal_compressed_under_max;
3663
3664 int iokit_mapped_over;
3665 ledger_amount_t iokit_mapped_over_total;
3666 ledger_amount_t iokit_mapped_over_max;
3667 int iokit_mapped_under;
3668 ledger_amount_t iokit_mapped_under_total;
3669 ledger_amount_t iokit_mapped_under_max;
3670
3671 int alternate_accounting_over;
3672 ledger_amount_t alternate_accounting_over_total;
3673 ledger_amount_t alternate_accounting_over_max;
3674 int alternate_accounting_under;
3675 ledger_amount_t alternate_accounting_under_total;
3676 ledger_amount_t alternate_accounting_under_max;
3677
3678 int alternate_accounting_compressed_over;
3679 ledger_amount_t alternate_accounting_compressed_over_total;
3680 ledger_amount_t alternate_accounting_compressed_over_max;
3681 int alternate_accounting_compressed_under;
3682 ledger_amount_t alternate_accounting_compressed_under_total;
3683 ledger_amount_t alternate_accounting_compressed_under_max;
3684
3685 int page_table_over;
3686 ledger_amount_t page_table_over_total;
3687 ledger_amount_t page_table_over_max;
3688 int page_table_under;
3689 ledger_amount_t page_table_under_total;
3690 ledger_amount_t page_table_under_max;
3691
3692 int purgeable_volatile_over;
3693 ledger_amount_t purgeable_volatile_over_total;
3694 ledger_amount_t purgeable_volatile_over_max;
3695 int purgeable_volatile_under;
3696 ledger_amount_t purgeable_volatile_under_total;
3697 ledger_amount_t purgeable_volatile_under_max;
3698
3699 int purgeable_nonvolatile_over;
3700 ledger_amount_t purgeable_nonvolatile_over_total;
3701 ledger_amount_t purgeable_nonvolatile_over_max;
3702 int purgeable_nonvolatile_under;
3703 ledger_amount_t purgeable_nonvolatile_under_total;
3704 ledger_amount_t purgeable_nonvolatile_under_max;
3705
3706 int purgeable_volatile_compressed_over;
3707 ledger_amount_t purgeable_volatile_compressed_over_total;
3708 ledger_amount_t purgeable_volatile_compressed_over_max;
3709 int purgeable_volatile_compressed_under;
3710 ledger_amount_t purgeable_volatile_compressed_under_total;
3711 ledger_amount_t purgeable_volatile_compressed_under_max;
3712
3713 int purgeable_nonvolatile_compressed_over;
3714 ledger_amount_t purgeable_nonvolatile_compressed_over_total;
3715 ledger_amount_t purgeable_nonvolatile_compressed_over_max;
3716 int purgeable_nonvolatile_compressed_under;
3717 ledger_amount_t purgeable_nonvolatile_compressed_under_total;
3718 ledger_amount_t purgeable_nonvolatile_compressed_under_max;
3719
3720 int network_volatile_over;
3721 ledger_amount_t network_volatile_over_total;
3722 ledger_amount_t network_volatile_over_max;
3723 int network_volatile_under;
3724 ledger_amount_t network_volatile_under_total;
3725 ledger_amount_t network_volatile_under_max;
3726
3727 int network_nonvolatile_over;
3728 ledger_amount_t network_nonvolatile_over_total;
3729 ledger_amount_t network_nonvolatile_over_max;
3730 int network_nonvolatile_under;
3731 ledger_amount_t network_nonvolatile_under_total;
3732 ledger_amount_t network_nonvolatile_under_max;
3733
3734 int network_volatile_compressed_over;
3735 ledger_amount_t network_volatile_compressed_over_total;
3736 ledger_amount_t network_volatile_compressed_over_max;
3737 int network_volatile_compressed_under;
3738 ledger_amount_t network_volatile_compressed_under_total;
3739 ledger_amount_t network_volatile_compressed_under_max;
3740
3741 int network_nonvolatile_compressed_over;
3742 ledger_amount_t network_nonvolatile_compressed_over_total;
3743 ledger_amount_t network_nonvolatile_compressed_over_max;
3744 int network_nonvolatile_compressed_under;
3745 ledger_amount_t network_nonvolatile_compressed_under_total;
3746 ledger_amount_t network_nonvolatile_compressed_under_max;
3747 } pmap_ledgers_drift;
3748 #endif /* MACH_ASSERT */
3749
3750 /*
3751 * Retire the given physical map from service.
3752 * Should only be called if the map contains
3753 * no valid mappings.
3754 */
3755 MARK_AS_PMAP_TEXT static void
3756 pmap_destroy_internal(
3757 pmap_t pmap)
3758 {
3759 if (pmap == PMAP_NULL) {
3760 return;
3761 }
3762
3763 VALIDATE_PMAP(pmap);
3764
3765 int32_t ref_count = __c11_atomic_fetch_sub(&pmap->ref_count, 1, memory_order_relaxed) - 1;
3766 if (ref_count > 0) {
3767 return;
3768 } else if (ref_count < 0) {
3769 panic("pmap %p: refcount underflow", pmap);
3770 } else if (pmap == kernel_pmap) {
3771 panic("pmap %p: attempt to destroy kernel pmap", pmap);
3772 }
3773
3774 #if (__ARM_VMSA__ == 7)
3775 pt_entry_t *ttep;
3776 unsigned int i;
3777
3778 pmap_simple_lock(&pmaps_lock);
3779 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3780 pmap->gc_status |= PMAP_GC_WAIT;
3781 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
3782 pmap_simple_unlock(&pmaps_lock);
3783 (void) thread_block(THREAD_CONTINUE_NULL);
3784 pmap_simple_lock(&pmaps_lock);
3785 }
3786 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3787 pmap_simple_unlock(&pmaps_lock);
3788
3789 if (pmap->cpu_ref != 0) {
3790 panic("pmap_destroy(%p): cpu_ref = %u", pmap, pmap->cpu_ref);
3791 }
3792
3793 pmap_trim_self(pmap);
3794
3795 /*
3796 * Free the memory maps, then the
3797 * pmap structure.
3798 */
3799 PMAP_LOCK(pmap);
3800 for (i = 0; i < pmap->tte_index_max; i++) {
3801 ttep = &pmap->tte[i];
3802 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3803 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
3804 }
3805 }
3806 PMAP_UNLOCK(pmap);
3807
3808 if (pmap->tte) {
3809 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
3810 pmap->tte = (tt_entry_t *) NULL;
3811 pmap->ttep = 0;
3812 pmap->tte_index_max = 0;
3813 }
3814 if (pmap->prev_tte) {
3815 pmap_tt1_deallocate(pmap, pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0);
3816 pmap->prev_tte = (tt_entry_t *) NULL;
3817 }
3818 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3819
3820 flush_mmu_tlb_asid(pmap->asid);
3821 /* return its asid to the pool */
3822 free_asid(pmap->vasid);
3823 pmap_check_ledgers(pmap);
3824
3825
3826 if (pmap->nested_region_asid_bitmap) {
3827 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
3828 }
3829 zfree(pmap_zone, pmap);
3830 #else /* __ARM_VMSA__ == 7 */
3831 pt_entry_t *ttep;
3832 pmap_paddr_t pa;
3833 vm_map_address_t c;
3834
3835 pmap_unmap_sharedpage(pmap);
3836
3837 pmap_simple_lock(&pmaps_lock);
3838 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3839 pmap->gc_status |= PMAP_GC_WAIT;
3840 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
3841 pmap_simple_unlock(&pmaps_lock);
3842 (void) thread_block(THREAD_CONTINUE_NULL);
3843 pmap_simple_lock(&pmaps_lock);
3844 }
3845 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3846 pmap_simple_unlock(&pmaps_lock);
3847
3848 pmap_trim_self(pmap);
3849
3850 /*
3851 * Free the memory maps, then the
3852 * pmap structure.
3853 */
3854 for (c = pmap->min; c < pmap->max; c += ARM_TT_L2_SIZE) {
3855 ttep = pmap_tt2e(pmap, c);
3856 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3857 PMAP_LOCK(pmap);
3858 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L2_LEVEL);
3859 PMAP_UNLOCK(pmap);
3860 }
3861 }
3862 #if !__ARM64_TWO_LEVEL_PMAP__
3863 for (c = pmap->min; c < pmap->max; c += ARM_TT_L1_SIZE) {
3864 ttep = pmap_tt1e(pmap, c);
3865 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3866 PMAP_LOCK(pmap);
3867 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
3868 PMAP_UNLOCK(pmap);
3869 }
3870 }
3871 #endif
3872
3873
3874 if (pmap->tte) {
3875 pa = pmap->ttep;
3876 pmap_tt1_deallocate(pmap, (tt_entry_t *)phystokv(pa), PMAP_ROOT_ALLOC_SIZE, 0);
3877 }
3878
3879 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3880 flush_mmu_tlb_asid((uint64_t)(pmap->asid) << TLBI_ASID_SHIFT);
3881 free_asid(pmap->vasid);
3882
3883 if (pmap->nested_region_asid_bitmap) {
3884 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
3885 }
3886
3887 pmap_check_ledgers(pmap);
3888
3889 zfree(pmap_zone, pmap);
3890
3891 #endif /* __ARM_VMSA__ == 7 */
3892 }
3893
3894 void
3895 pmap_destroy(
3896 pmap_t pmap)
3897 {
3898 ledger_t ledger;
3899
3900 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
3901
3902 ledger = pmap->ledger;
3903
3904 pmap_destroy_internal(pmap);
3905
3906 ledger_dereference(ledger);
3907
3908 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
3909 }
3910
3911
3912 /*
3913 * Add a reference to the specified pmap.
3914 */
3915 MARK_AS_PMAP_TEXT static void
3916 pmap_reference_internal(
3917 pmap_t pmap)
3918 {
3919 if (pmap != PMAP_NULL) {
3920 VALIDATE_PMAP(pmap);
3921 __c11_atomic_fetch_add(&pmap->ref_count, 1, memory_order_relaxed);
3922 }
3923 }
3924
3925 void
3926 pmap_reference(
3927 pmap_t pmap)
3928 {
3929 pmap_reference_internal(pmap);
3930 }
3931
3932 static tt_entry_t *
3933 pmap_tt1_allocate(
3934 pmap_t pmap,
3935 vm_size_t size,
3936 unsigned option)
3937 {
3938 tt_entry_t *tt1;
3939 tt_free_entry_t *tt1_free;
3940 pmap_paddr_t pa;
3941 vm_address_t va;
3942 vm_address_t va_end;
3943 kern_return_t ret;
3944
3945 pmap_simple_lock(&pmaps_lock);
3946 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
3947 free_page_size_tt_count--;
3948 tt1 = (tt_entry_t *)free_page_size_tt_list;
3949 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3950 pmap_simple_unlock(&pmaps_lock);
3951 pmap_tt_ledger_credit(pmap, size);
3952 return (tt_entry_t *)tt1;
3953 }
3954 ;
3955 if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
3956 free_two_page_size_tt_count--;
3957 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
3958 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3959 pmap_simple_unlock(&pmaps_lock);
3960 pmap_tt_ledger_credit(pmap, size);
3961 return (tt_entry_t *)tt1;
3962 }
3963 ;
3964 if (free_tt_count != 0) {
3965 free_tt_count--;
3966 tt1 = (tt_entry_t *)free_tt_list;
3967 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
3968 pmap_simple_unlock(&pmaps_lock);
3969 pmap_tt_ledger_credit(pmap, size);
3970 return (tt_entry_t *)tt1;
3971 }
3972
3973 pmap_simple_unlock(&pmaps_lock);
3974
3975 ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
3976
3977 if (ret == KERN_RESOURCE_SHORTAGE) {
3978 return (tt_entry_t *)0;
3979 }
3980
3981
3982 if (size < PAGE_SIZE) {
3983 pmap_simple_lock(&pmaps_lock);
3984
3985 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + size; va < va_end; va = va + size) {
3986 tt1_free = (tt_free_entry_t *)va;
3987 tt1_free->next = free_tt_list;
3988 free_tt_list = tt1_free;
3989 free_tt_count++;
3990 }
3991 if (free_tt_count > free_tt_max) {
3992 free_tt_max = free_tt_count;
3993 }
3994
3995 pmap_simple_unlock(&pmaps_lock);
3996 }
3997
3998 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
3999 * Depending on the device, this can vary between 512b and 16K. */
4000 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4001 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
4002 pmap_tt_ledger_credit(pmap, size);
4003
4004 return (tt_entry_t *) phystokv(pa);
4005 }
4006
4007 static void
4008 pmap_tt1_deallocate(
4009 pmap_t pmap,
4010 tt_entry_t *tt,
4011 vm_size_t size,
4012 unsigned option)
4013 {
4014 tt_free_entry_t *tt_entry;
4015
4016 tt_entry = (tt_free_entry_t *)tt;
4017 if (not_in_kdp) {
4018 pmap_simple_lock(&pmaps_lock);
4019 }
4020
4021 if (size < PAGE_SIZE) {
4022 free_tt_count++;
4023 if (free_tt_count > free_tt_max) {
4024 free_tt_max = free_tt_count;
4025 }
4026 tt_entry->next = free_tt_list;
4027 free_tt_list = tt_entry;
4028 }
4029
4030 if (size == PAGE_SIZE) {
4031 free_page_size_tt_count++;
4032 if (free_page_size_tt_count > free_page_size_tt_max) {
4033 free_page_size_tt_max = free_page_size_tt_count;
4034 }
4035 tt_entry->next = free_page_size_tt_list;
4036 free_page_size_tt_list = tt_entry;
4037 }
4038
4039 if (size == 2 * PAGE_SIZE) {
4040 free_two_page_size_tt_count++;
4041 if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
4042 free_two_page_size_tt_max = free_two_page_size_tt_count;
4043 }
4044 tt_entry->next = free_two_page_size_tt_list;
4045 free_two_page_size_tt_list = tt_entry;
4046 }
4047
4048 if ((option & PMAP_TT_DEALLOCATE_NOBLOCK) || (!not_in_kdp)) {
4049 if (not_in_kdp) {
4050 pmap_simple_unlock(&pmaps_lock);
4051 }
4052 pmap_tt_ledger_debit(pmap, size);
4053 return;
4054 }
4055
4056 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
4057 free_page_size_tt_count--;
4058 tt = (tt_entry_t *)free_page_size_tt_list;
4059 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
4060
4061 pmap_simple_unlock(&pmaps_lock);
4062
4063 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
4064
4065 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4066
4067 pmap_simple_lock(&pmaps_lock);
4068 }
4069
4070 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
4071 free_two_page_size_tt_count--;
4072 tt = (tt_entry_t *)free_two_page_size_tt_list;
4073 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
4074
4075 pmap_simple_unlock(&pmaps_lock);
4076
4077 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
4078
4079 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4080
4081 pmap_simple_lock(&pmaps_lock);
4082 }
4083 pmap_simple_unlock(&pmaps_lock);
4084 pmap_tt_ledger_debit(pmap, size);
4085 }
4086
4087 static kern_return_t
4088 pmap_tt_allocate(
4089 pmap_t pmap,
4090 tt_entry_t **ttp,
4091 unsigned int level,
4092 unsigned int options)
4093 {
4094 pmap_paddr_t pa;
4095 *ttp = NULL;
4096
4097 PMAP_LOCK(pmap);
4098 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
4099 tt_free_entry_t *tt_free_next;
4100
4101 tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
4102 *ttp = (tt_entry_t *)pmap->tt_entry_free;
4103 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
4104 }
4105 PMAP_UNLOCK(pmap);
4106
4107 if (*ttp == NULL) {
4108 pt_desc_t *ptdp;
4109
4110 /*
4111 * Allocate a VM page for the level x page table entries.
4112 */
4113 while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
4114 if (options & PMAP_OPTIONS_NOWAIT) {
4115 return KERN_RESOURCE_SHORTAGE;
4116 }
4117 VM_PAGE_WAIT();
4118 }
4119
4120 while ((ptdp = ptd_alloc(pmap, false)) == NULL) {
4121 if (options & PMAP_OPTIONS_NOWAIT) {
4122 pmap_pages_free(pa, PAGE_SIZE);
4123 return KERN_RESOURCE_SHORTAGE;
4124 }
4125 VM_PAGE_WAIT();
4126 }
4127
4128 if (level < PMAP_TT_MAX_LEVEL) {
4129 OSAddAtomic64(1, &alloc_ttepages_count);
4130 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4131 } else {
4132 OSAddAtomic64(1, &alloc_ptepages_count);
4133 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4134 }
4135
4136 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
4137
4138 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
4139
4140 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
4141
4142 __unreachable_ok_push
4143 if (TEST_PAGE_RATIO_4) {
4144 vm_address_t va;
4145 vm_address_t va_end;
4146
4147 PMAP_LOCK(pmap);
4148
4149 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va + ARM_PGBYTES) {
4150 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4151 pmap->tt_entry_free = (tt_entry_t *)va;
4152 }
4153 PMAP_UNLOCK(pmap);
4154 }
4155 __unreachable_ok_pop
4156
4157 *ttp = (tt_entry_t *)phystokv(pa);
4158 }
4159
4160
4161 return KERN_SUCCESS;
4162 }
4163
4164
4165 static void
4166 pmap_tt_deallocate(
4167 pmap_t pmap,
4168 tt_entry_t *ttp,
4169 unsigned int level)
4170 {
4171 pt_desc_t *ptdp;
4172 unsigned pt_acc_cnt;
4173 unsigned i, max_pt_index = PAGE_RATIO;
4174 vm_offset_t free_page = 0;
4175
4176 PMAP_LOCK(pmap);
4177
4178 ptdp = ptep_get_ptd((vm_offset_t)ttp);
4179
4180 ptdp->pt_map[ARM_PT_DESC_INDEX(ttp)].va = (vm_offset_t)-1;
4181
4182 if ((level < PMAP_TT_MAX_LEVEL) && (ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT)) {
4183 ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4184 }
4185
4186 if (ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt != 0) {
4187 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt);
4188 }
4189
4190 ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4191
4192 for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
4193 pt_acc_cnt += ptdp->pt_cnt[i].refcnt;
4194 }
4195
4196 if (pt_acc_cnt == 0) {
4197 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4198 unsigned pt_free_entry_cnt = 1;
4199
4200 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
4201 tt_free_entry_t *tt_free_list_next;
4202
4203 tt_free_list_next = tt_free_list->next;
4204 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
4205 pt_free_entry_cnt++;
4206 }
4207 tt_free_list = tt_free_list_next;
4208 }
4209 if (pt_free_entry_cnt == max_pt_index) {
4210 tt_free_entry_t *tt_free_list_cur;
4211
4212 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
4213 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4214 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
4215
4216 while (tt_free_list_cur) {
4217 tt_free_entry_t *tt_free_list_next;
4218
4219 tt_free_list_next = tt_free_list_cur->next;
4220 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
4221 tt_free_list->next = tt_free_list_next->next;
4222 } else {
4223 tt_free_list = tt_free_list_next;
4224 }
4225 tt_free_list_cur = tt_free_list_next;
4226 }
4227 } else {
4228 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4229 pmap->tt_entry_free = ttp;
4230 }
4231 } else {
4232 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4233 pmap->tt_entry_free = ttp;
4234 }
4235
4236 PMAP_UNLOCK(pmap);
4237
4238 if (free_page != 0) {
4239 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
4240 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
4241 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
4242 if (level < PMAP_TT_MAX_LEVEL) {
4243 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4244 } else {
4245 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4246 }
4247 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
4248 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
4249 }
4250 }
4251
4252 static void
4253 pmap_tte_remove(
4254 pmap_t pmap,
4255 tt_entry_t *ttep,
4256 unsigned int level)
4257 {
4258 tt_entry_t tte = *ttep;
4259
4260 if (tte == 0) {
4261 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
4262 }
4263
4264 if (((level + 1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->pt_cnt[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
4265 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
4266 tte_get_ptd(tte), (tte_get_ptd(tte)->pt_cnt[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
4267 }
4268
4269 #if (__ARM_VMSA__ == 7)
4270 {
4271 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
4272 unsigned i;
4273
4274 for (i = 0; i < 4; i++, ttep_4M++) {
4275 *ttep_4M = (tt_entry_t) 0;
4276 }
4277 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
4278 }
4279 #else
4280 *ttep = (tt_entry_t) 0;
4281 FLUSH_PTE_STRONG(ttep);
4282 #endif
4283 }
4284
4285 static void
4286 pmap_tte_deallocate(
4287 pmap_t pmap,
4288 tt_entry_t *ttep,
4289 unsigned int level)
4290 {
4291 pmap_paddr_t pa;
4292 tt_entry_t tte;
4293
4294 PMAP_ASSERT_LOCKED(pmap);
4295
4296 tte = *ttep;
4297
4298 #if MACH_ASSERT
4299 if (tte_get_ptd(tte)->pmap != pmap) {
4300 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4301 tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
4302 }
4303 #endif
4304
4305 pmap_tte_remove(pmap, ttep, level);
4306
4307 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4308 #if MACH_ASSERT
4309 {
4310 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
4311 unsigned i;
4312
4313 for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++, pte_p++) {
4314 if (ARM_PTE_IS_COMPRESSED(*pte_p)) {
4315 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4316 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4317 } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
4318 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4319 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4320 }
4321 }
4322 }
4323 #endif
4324 PMAP_UNLOCK(pmap);
4325
4326 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4327 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4328 pa = tte_to_pa(tte) & ~ARM_PGMASK;
4329 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
4330 PMAP_LOCK(pmap);
4331 }
4332 }
4333
4334 /*
4335 * Remove a range of hardware page-table entries.
4336 * The entries given are the first (inclusive)
4337 * and last (exclusive) entries for the VM pages.
4338 * The virtual address is the va for the first pte.
4339 *
4340 * The pmap must be locked.
4341 * If the pmap is not the kernel pmap, the range must lie
4342 * entirely within one pte-page. This is NOT checked.
4343 * Assumes that the pte-page exists.
4344 *
4345 * Returns the number of PTE changed, and sets *rmv_cnt
4346 * to the number of SPTE changed.
4347 */
4348 static int
4349 pmap_remove_range(
4350 pmap_t pmap,
4351 vm_map_address_t va,
4352 pt_entry_t *bpte,
4353 pt_entry_t *epte,
4354 uint32_t *rmv_cnt)
4355 {
4356 return pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
4357 PMAP_OPTIONS_REMOVE);
4358 }
4359
4360
4361 #ifdef PVH_FLAG_EXEC
4362
4363 /*
4364 * Update the access protection bits of the physical aperture mapping for a page.
4365 * This is useful, for example, in guranteeing that a verified executable page
4366 * has no writable mappings anywhere in the system, including the physical
4367 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
4368 * synchronization overhead in cases where the call to this function is
4369 * guaranteed to be followed by other TLB operations.
4370 */
4371 static void
4372 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
4373 {
4374 #if __ARM_PTE_PHYSMAP__
4375 ASSERT_PVH_LOCKED(pai);
4376 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
4377 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
4378
4379 pt_entry_t tmplate = *pte_p;
4380 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
4381 return;
4382 }
4383 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
4384 #if (__ARM_VMSA__ > 7)
4385 if (tmplate & ARM_PTE_HINT_MASK) {
4386 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4387 __func__, pte_p, (void *)kva, tmplate);
4388 }
4389 #endif
4390 WRITE_PTE_STRONG(pte_p, tmplate);
4391 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
4392 if (!flush_tlb_async) {
4393 sync_tlb_flush();
4394 }
4395 #endif
4396 }
4397
4398 #endif /* defined(PVH_FLAG_EXEC) */
4399
4400 static void
4401 pmap_remove_pv(
4402 pmap_t pmap,
4403 pt_entry_t *cpte,
4404 int pai,
4405 int *num_internal,
4406 int *num_alt_internal,
4407 int *num_reusable,
4408 int *num_external)
4409 {
4410 pv_entry_t **pv_h, **pve_pp;
4411 pv_entry_t *pve_p;
4412
4413 ASSERT_PVH_LOCKED(pai);
4414 pv_h = pai_to_pvh(pai);
4415 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
4416
4417
4418 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4419 if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0)) {
4420 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
4421 }
4422 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
4423 assert(IS_INTERNAL_PAGE(pai));
4424 (*num_internal)++;
4425 (*num_alt_internal)++;
4426 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
4427 } else if (IS_INTERNAL_PAGE(pai)) {
4428 if (IS_REUSABLE_PAGE(pai)) {
4429 (*num_reusable)++;
4430 } else {
4431 (*num_internal)++;
4432 }
4433 } else {
4434 (*num_external)++;
4435 }
4436 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
4437 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4438 pve_pp = pv_h;
4439 pve_p = pvh_list(pv_h);
4440
4441 while (pve_p != PV_ENTRY_NULL &&
4442 (pve_get_ptep(pve_p) != cpte)) {
4443 pve_pp = pve_link_field(pve_p);
4444 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4445 }
4446
4447 if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0)) {
4448 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
4449 }
4450
4451 #if MACH_ASSERT
4452 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
4453 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4454 while (check_pve_p != PV_ENTRY_NULL) {
4455 if (pve_get_ptep(check_pve_p) == cpte) {
4456 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
4457 __func__, cpte, pmap, pv_h, pve_p, pai);
4458 }
4459 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
4460 }
4461 }
4462 #endif
4463
4464 if (IS_ALTACCT_PAGE(pai, pve_p)) {
4465 assert(IS_INTERNAL_PAGE(pai));
4466 (*num_internal)++;
4467 (*num_alt_internal)++;
4468 CLR_ALTACCT_PAGE(pai, pve_p);
4469 } else if (IS_INTERNAL_PAGE(pai)) {
4470 if (IS_REUSABLE_PAGE(pai)) {
4471 (*num_reusable)++;
4472 } else {
4473 (*num_internal)++;
4474 }
4475 } else {
4476 (*num_external)++;
4477 }
4478
4479 pvh_remove(pv_h, pve_pp, pve_p);
4480 pv_free(pve_p);
4481 if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
4482 pvh_set_flags(pv_h, pvh_flags);
4483 }
4484 } else {
4485 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
4486 __func__, *pv_h, cpte, pmap, pv_h, pai);
4487 }
4488
4489 #ifdef PVH_FLAG_EXEC
4490 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL)) {
4491 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
4492 }
4493 #endif
4494 }
4495
4496 static int
4497 pmap_remove_range_options(
4498 pmap_t pmap,
4499 vm_map_address_t va,
4500 pt_entry_t *bpte,
4501 pt_entry_t *epte,
4502 uint32_t *rmv_cnt,
4503 int options)
4504 {
4505 pt_entry_t *cpte;
4506 int num_removed, num_unwired;
4507 int num_pte_changed;
4508 int pai = 0;
4509 pmap_paddr_t pa;
4510 int num_external, num_internal, num_reusable;
4511 int num_alt_internal;
4512 uint64_t num_compressed, num_alt_compressed;
4513
4514 PMAP_ASSERT_LOCKED(pmap);
4515
4516 num_removed = 0;
4517 num_unwired = 0;
4518 num_pte_changed = 0;
4519 num_external = 0;
4520 num_internal = 0;
4521 num_reusable = 0;
4522 num_compressed = 0;
4523 num_alt_internal = 0;
4524 num_alt_compressed = 0;
4525
4526 for (cpte = bpte; cpte < epte;
4527 cpte += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
4528 pt_entry_t spte;
4529 boolean_t managed = FALSE;
4530
4531 spte = *cpte;
4532
4533 #if CONFIG_PGTRACE
4534 if (pgtrace_enabled) {
4535 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
4536 }
4537 #endif
4538
4539 while (!managed) {
4540 if (pmap != kernel_pmap &&
4541 (options & PMAP_OPTIONS_REMOVE) &&
4542 (ARM_PTE_IS_COMPRESSED(spte))) {
4543 /*
4544 * "pmap" must be locked at this point,
4545 * so this should not race with another
4546 * pmap_remove_range() or pmap_enter().
4547 */
4548
4549 /* one less "compressed"... */
4550 num_compressed++;
4551 if (spte & ARM_PTE_COMPRESSED_ALT) {
4552 /* ... but it used to be "ALTACCT" */
4553 num_alt_compressed++;
4554 }
4555
4556 /* clear marker */
4557 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4558 /*
4559 * "refcnt" also accounts for
4560 * our "compressed" markers,
4561 * so let's update it here.
4562 */
4563 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->pt_cnt[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
4564 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4565 }
4566 spte = *cpte;
4567 }
4568 /*
4569 * It may be possible for the pte to transition from managed
4570 * to unmanaged in this timeframe; for now, elide the assert.
4571 * We should break out as a consequence of checking pa_valid.
4572 */
4573 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4574 pa = pte_to_pa(spte);
4575 if (!pa_valid(pa)) {
4576 break;
4577 }
4578 pai = (int)pa_index(pa);
4579 LOCK_PVH(pai);
4580 spte = *cpte;
4581 pa = pte_to_pa(spte);
4582 if (pai == (int)pa_index(pa)) {
4583 managed = TRUE;
4584 break; // Leave pai locked as we will unlock it after we free the PV entry
4585 }
4586 UNLOCK_PVH(pai);
4587 }
4588
4589 if (ARM_PTE_IS_COMPRESSED(*cpte)) {
4590 /*
4591 * There used to be a valid mapping here but it
4592 * has already been removed when the page was
4593 * sent to the VM compressor, so nothing left to
4594 * remove now...
4595 */
4596 continue;
4597 }
4598
4599 /* remove the translation, do not flush the TLB */
4600 if (*cpte != ARM_PTE_TYPE_FAULT) {
4601 assert(!ARM_PTE_IS_COMPRESSED(*cpte));
4602 #if MACH_ASSERT
4603 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
4604 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4605 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
4606 }
4607 #endif
4608 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4609 num_pte_changed++;
4610 }
4611
4612 if ((spte != ARM_PTE_TYPE_FAULT) &&
4613 (pmap != kernel_pmap)) {
4614 assert(!ARM_PTE_IS_COMPRESSED(spte));
4615 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->pt_cnt[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
4616 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4617 }
4618 if (rmv_cnt) {
4619 (*rmv_cnt)++;
4620 }
4621 }
4622
4623 if (pte_is_wired(spte)) {
4624 pte_set_wired(cpte, 0);
4625 num_unwired++;
4626 }
4627 /*
4628 * if not managed, we're done
4629 */
4630 if (!managed) {
4631 continue;
4632 }
4633 /*
4634 * find and remove the mapping from the chain for this
4635 * physical address.
4636 */
4637
4638 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
4639
4640 UNLOCK_PVH(pai);
4641 num_removed++;
4642 }
4643
4644 /*
4645 * Update the counts
4646 */
4647 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
4648 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
4649
4650 if (pmap != kernel_pmap) {
4651 /* sanity checks... */
4652 #if MACH_ASSERT
4653 if (pmap->stats.internal < num_internal) {
4654 if ((!pmap_stats_assert ||
4655 !pmap->pmap_stats_assert)) {
4656 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4657 pmap->pmap_pid,
4658 pmap->pmap_procname,
4659 pmap,
4660 (uint64_t) va,
4661 bpte,
4662 epte,
4663 options,
4664 num_internal,
4665 num_removed,
4666 num_unwired,
4667 num_external,
4668 num_reusable,
4669 num_compressed,
4670 num_alt_internal,
4671 num_alt_compressed,
4672 num_pte_changed,
4673 pmap->stats.internal,
4674 pmap->stats.reusable);
4675 } else {
4676 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4677 pmap->pmap_pid,
4678 pmap->pmap_procname,
4679 pmap,
4680 (uint64_t) va,
4681 bpte,
4682 epte,
4683 options,
4684 num_internal,
4685 num_removed,
4686 num_unwired,
4687 num_external,
4688 num_reusable,
4689 num_compressed,
4690 num_alt_internal,
4691 num_alt_compressed,
4692 num_pte_changed,
4693 pmap->stats.internal,
4694 pmap->stats.reusable);
4695 }
4696 }
4697 #endif /* MACH_ASSERT */
4698 PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
4699 pmap,
4700 "pmap=%p num_external=%d stats.external=%d",
4701 pmap, num_external, pmap->stats.external);
4702 PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
4703 pmap,
4704 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4705 pmap,
4706 num_internal, pmap->stats.internal,
4707 num_reusable, pmap->stats.reusable);
4708 PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
4709 pmap,
4710 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4711 pmap,
4712 num_internal, pmap->stats.internal,
4713 num_reusable, pmap->stats.reusable);
4714 PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
4715 pmap,
4716 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4717 pmap, num_compressed, num_alt_compressed,
4718 pmap->stats.compressed);
4719
4720 /* update pmap stats... */
4721 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
4722 if (num_external) {
4723 OSAddAtomic(-num_external, &pmap->stats.external);
4724 }
4725 if (num_internal) {
4726 OSAddAtomic(-num_internal, &pmap->stats.internal);
4727 }
4728 if (num_reusable) {
4729 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
4730 }
4731 if (num_compressed) {
4732 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
4733 }
4734 /* ... and ledgers */
4735 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
4736 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
4737 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
4738 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
4739 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
4740 /* make needed adjustments to phys_footprint */
4741 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
4742 machine_ptob((num_internal -
4743 num_alt_internal) +
4744 (num_compressed -
4745 num_alt_compressed)));
4746 }
4747
4748 /* flush the ptable entries we have written */
4749 if (num_pte_changed > 0) {
4750 FLUSH_PTE_RANGE_STRONG(bpte, epte);
4751 }
4752
4753 return num_pte_changed;
4754 }
4755
4756
4757 /*
4758 * Remove the given range of addresses
4759 * from the specified map.
4760 *
4761 * It is assumed that the start and end are properly
4762 * rounded to the hardware page size.
4763 */
4764 void
4765 pmap_remove(
4766 pmap_t pmap,
4767 vm_map_address_t start,
4768 vm_map_address_t end)
4769 {
4770 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
4771 }
4772
4773 MARK_AS_PMAP_TEXT static int
4774 pmap_remove_options_internal(
4775 pmap_t pmap,
4776 vm_map_address_t start,
4777 vm_map_address_t end,
4778 int options)
4779 {
4780 int remove_count = 0;
4781 pt_entry_t *bpte, *epte;
4782 pt_entry_t *pte_p;
4783 tt_entry_t *tte_p;
4784 uint32_t rmv_spte = 0;
4785
4786 if (__improbable(end < start)) {
4787 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
4788 }
4789
4790 VALIDATE_PMAP(pmap);
4791 PMAP_LOCK(pmap);
4792
4793 tte_p = pmap_tte(pmap, start);
4794
4795 if (tte_p == (tt_entry_t *) NULL) {
4796 goto done;
4797 }
4798
4799 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4800 pte_p = (pt_entry_t *) ttetokv(*tte_p);
4801 bpte = &pte_p[ptenum(start)];
4802 epte = bpte + ((end - start) >> ARM_TT_LEAF_SHIFT);
4803
4804 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
4805 &rmv_spte, options);
4806
4807 #if (__ARM_VMSA__ == 7)
4808 if (rmv_spte && (ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
4809 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
4810 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
4811 flush_mmu_tlb_entry((start & ~ARM_TT_L1_OFFMASK) | (pmap->asid & 0xff));
4812 }
4813 #else
4814 if (rmv_spte && (ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
4815 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
4816 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
4817 flush_mmu_tlb_entry(tlbi_addr(start & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
4818 }
4819 #endif
4820 }
4821
4822 done:
4823 PMAP_UNLOCK(pmap);
4824 return remove_count;
4825 }
4826
4827 void
4828 pmap_remove_options(
4829 pmap_t pmap,
4830 vm_map_address_t start,
4831 vm_map_address_t end,
4832 int options)
4833 {
4834 int remove_count = 0;
4835 vm_map_address_t va;
4836
4837 if (pmap == PMAP_NULL) {
4838 return;
4839 }
4840
4841 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
4842 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
4843 VM_KERNEL_ADDRHIDE(end));
4844
4845 #if MACH_ASSERT
4846 if ((start | end) & PAGE_MASK) {
4847 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
4848 pmap, (uint64_t)start, (uint64_t)end);
4849 }
4850 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
4851 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
4852 pmap, (uint64_t)start, (uint64_t)end);
4853 }
4854 #endif
4855
4856 /*
4857 * Invalidate the translation buffer first
4858 */
4859 va = start;
4860 while (va < end) {
4861 vm_map_address_t l;
4862
4863 #if (__ARM_VMSA__ == 7)
4864 l = ((va + ARM_TT_L1_SIZE) & ~ARM_TT_L1_OFFMASK);
4865 #else
4866 l = ((va + ARM_TT_L2_SIZE) & ~ARM_TT_L2_OFFMASK);
4867 #endif
4868 if (l > end) {
4869 l = end;
4870 }
4871
4872 remove_count += pmap_remove_options_internal(pmap, va, l, options);
4873
4874 va = l;
4875 }
4876
4877 if (remove_count > 0) {
4878 PMAP_UPDATE_TLBS(pmap, start, end);
4879 }
4880
4881 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
4882 }
4883
4884
4885 /*
4886 * Remove phys addr if mapped in specified map
4887 */
4888 void
4889 pmap_remove_some_phys(
4890 __unused pmap_t map,
4891 __unused ppnum_t pn)
4892 {
4893 /* Implement to support working set code */
4894 }
4895
4896 void
4897 pmap_set_pmap(
4898 pmap_t pmap,
4899 #if !__ARM_USER_PROTECT__
4900 __unused
4901 #endif
4902 thread_t thread)
4903 {
4904 pmap_switch(pmap);
4905 #if __ARM_USER_PROTECT__
4906 if (pmap->tte_index_max == NTTES) {
4907 thread->machine.uptw_ttc = 2;
4908 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
4909 } else {
4910 thread->machine.uptw_ttc = 1; \
4911 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
4912 }
4913 thread->machine.asid = pmap->asid;
4914 #endif
4915 }
4916
4917 static void
4918 pmap_flush_core_tlb_asid(pmap_t pmap)
4919 {
4920 #if (__ARM_VMSA__ == 7)
4921 flush_core_tlb_asid(pmap->asid);
4922 #else
4923 flush_core_tlb_asid(((uint64_t) pmap->asid) << TLBI_ASID_SHIFT);
4924 #endif
4925 }
4926
4927 MARK_AS_PMAP_TEXT static void
4928 pmap_switch_internal(
4929 pmap_t pmap)
4930 {
4931 VALIDATE_PMAP(pmap);
4932 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
4933 uint32_t last_asid_high_bits, asid_high_bits;
4934 boolean_t do_asid_flush = FALSE;
4935
4936 #if (__ARM_VMSA__ == 7)
4937 if (not_in_kdp) {
4938 pmap_simple_lock(&pmap->tt1_lock);
4939 }
4940 #else
4941 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
4942 #endif
4943
4944 /* Paranoia. */
4945 assert(pmap->asid < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
4946
4947 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
4948 asid_high_bits = pmap->vasid >> ARM_ASID_SHIFT;
4949 last_asid_high_bits = (uint32_t) cpu_data_ptr->cpu_asid_high_bits[pmap->asid];
4950
4951 if (asid_high_bits != last_asid_high_bits) {
4952 /*
4953 * If the virtual ASID of the new pmap does not match the virtual ASID
4954 * last seen on this CPU for the physical ASID (that was a mouthful),
4955 * then this switch runs the risk of aliasing. We need to flush the
4956 * TLB for this phyiscal ASID in this case.
4957 */
4958 cpu_data_ptr->cpu_asid_high_bits[pmap->asid] = (uint8_t) asid_high_bits;
4959 do_asid_flush = TRUE;
4960 }
4961
4962 pmap_switch_user_ttb_internal(pmap);
4963
4964 #if (__ARM_VMSA__ > 7)
4965 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
4966 * to flush the userspace mappings for that region. Those mappings are global
4967 * and will not be protected by the ASID. It should also be cheaper to flush the
4968 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
4969 if ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap)) {
4970 flush_core_tlb();
4971 } else
4972 #endif
4973 if (do_asid_flush) {
4974 pmap_flush_core_tlb_asid(pmap);
4975 }
4976
4977 #if (__ARM_VMSA__ == 7)
4978 if (not_in_kdp) {
4979 pmap_simple_unlock(&pmap->tt1_lock);
4980 }
4981 #endif
4982 }
4983
4984 void
4985 pmap_switch(
4986 pmap_t pmap)
4987 {
4988 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
4989 pmap_switch_internal(pmap);
4990 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
4991 }
4992
4993 void
4994 pmap_page_protect(
4995 ppnum_t ppnum,
4996 vm_prot_t prot)
4997 {
4998 pmap_page_protect_options(ppnum, prot, 0, NULL);
4999 }
5000
5001 /*
5002 * Routine: pmap_page_protect_options
5003 *
5004 * Function:
5005 * Lower the permission for all mappings to a given
5006 * page.
5007 */
5008 MARK_AS_PMAP_TEXT static void
5009 pmap_page_protect_options_internal(
5010 ppnum_t ppnum,
5011 vm_prot_t prot,
5012 unsigned int options)
5013 {
5014 pmap_paddr_t phys = ptoa(ppnum);
5015 pv_entry_t **pv_h;
5016 pv_entry_t **pve_pp;
5017 pv_entry_t *pve_p;
5018 pv_entry_t *pveh_p;
5019 pv_entry_t *pvet_p;
5020 pt_entry_t *pte_p;
5021 pv_entry_t *new_pve_p;
5022 pt_entry_t *new_pte_p;
5023 vm_offset_t pvh_flags;
5024 int pai;
5025 boolean_t remove;
5026 boolean_t set_NX;
5027 boolean_t tlb_flush_needed = FALSE;
5028 unsigned int pvh_cnt = 0;
5029
5030 assert(ppnum != vm_page_fictitious_addr);
5031
5032 /* Only work with managed pages. */
5033 if (!pa_valid(phys)) {
5034 return;
5035 }
5036
5037 /*
5038 * Determine the new protection.
5039 */
5040 switch (prot) {
5041 case VM_PROT_ALL:
5042 return; /* nothing to do */
5043 case VM_PROT_READ:
5044 case VM_PROT_READ | VM_PROT_EXECUTE:
5045 remove = FALSE;
5046 break;
5047 default:
5048 remove = TRUE;
5049 break;
5050 }
5051
5052 pai = (int)pa_index(phys);
5053 LOCK_PVH(pai);
5054 pv_h = pai_to_pvh(pai);
5055 pvh_flags = pvh_get_flags(pv_h);
5056
5057
5058 pte_p = PT_ENTRY_NULL;
5059 pve_p = PV_ENTRY_NULL;
5060 pve_pp = pv_h;
5061 pveh_p = PV_ENTRY_NULL;
5062 pvet_p = PV_ENTRY_NULL;
5063 new_pve_p = PV_ENTRY_NULL;
5064 new_pte_p = PT_ENTRY_NULL;
5065 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5066 pte_p = pvh_ptep(pv_h);
5067 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5068 pve_p = pvh_list(pv_h);
5069 pveh_p = pve_p;
5070 }
5071
5072 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
5073 vm_map_address_t va;
5074 pmap_t pmap;
5075 pt_entry_t tmplate;
5076 boolean_t update = FALSE;
5077
5078 if (pve_p != PV_ENTRY_NULL) {
5079 pte_p = pve_get_ptep(pve_p);
5080 }
5081
5082 #ifdef PVH_FLAG_IOMMU
5083 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
5084 if (remove) {
5085 if (options & PMAP_OPTIONS_COMPRESSOR) {
5086 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
5087 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
5088 }
5089 if (pve_p != PV_ENTRY_NULL) {
5090 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5091 pvh_remove(pv_h, pve_pp, pve_p);
5092 pveh_p = pvh_list(pv_h);
5093 pve_next(pve_p) = new_pve_p;
5094 new_pve_p = pve_p;
5095 pve_p = temp_pve_p;
5096 continue;
5097 } else {
5098 new_pte_p = pte_p;
5099 break;
5100 }
5101 }
5102 goto protect_skip_pve;
5103 }
5104 #endif
5105 pmap = ptep_get_pmap(pte_p);
5106 va = ptep_get_va(pte_p);
5107
5108 if (pte_p == PT_ENTRY_NULL) {
5109 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
5110 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
5111 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
5112 #if MACH_ASSERT
5113 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
5114 pv_entry_t *check_pve_p = pveh_p;
5115 while (check_pve_p != PV_ENTRY_NULL) {
5116 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
5117 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5118 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
5119 }
5120 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
5121 }
5122 }
5123 #endif
5124 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5125 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
5126 }
5127
5128 #if DEVELOPMENT || DEBUG
5129 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5130 #else
5131 if ((prot & VM_PROT_EXECUTE))
5132 #endif
5133 { set_NX = FALSE;} else {
5134 set_NX = TRUE;
5135 }
5136
5137 /* Remove the mapping if new protection is NONE */
5138 if (remove) {
5139 boolean_t is_altacct = FALSE;
5140
5141 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5142 is_altacct = TRUE;
5143 } else {
5144 is_altacct = FALSE;
5145 }
5146
5147 if (pte_is_wired(*pte_p)) {
5148 pte_set_wired(pte_p, 0);
5149 if (pmap != kernel_pmap) {
5150 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5151 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5152 }
5153 }
5154
5155 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5156 pmap != kernel_pmap &&
5157 (options & PMAP_OPTIONS_COMPRESSOR) &&
5158 IS_INTERNAL_PAGE(pai)) {
5159 assert(!ARM_PTE_IS_COMPRESSED(*pte_p));
5160 /* mark this PTE as having been "compressed" */
5161 tmplate = ARM_PTE_COMPRESSED;
5162 if (is_altacct) {
5163 tmplate |= ARM_PTE_COMPRESSED_ALT;
5164 is_altacct = TRUE;
5165 }
5166 } else {
5167 tmplate = ARM_PTE_TYPE_FAULT;
5168 }
5169
5170 if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
5171 tmplate == ARM_PTE_TYPE_FAULT &&
5172 (pmap != kernel_pmap)) {
5173 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0) {
5174 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
5175 }
5176 }
5177
5178 if (*pte_p != tmplate) {
5179 WRITE_PTE_STRONG(pte_p, tmplate);
5180 update = TRUE;
5181 }
5182 pvh_cnt++;
5183 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
5184 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
5185
5186 #if MACH_ASSERT
5187 /*
5188 * We only ever compress internal pages.
5189 */
5190 if (options & PMAP_OPTIONS_COMPRESSOR) {
5191 assert(IS_INTERNAL_PAGE(pai));
5192 }
5193 #endif
5194
5195 if (pmap != kernel_pmap) {
5196 if (IS_REUSABLE_PAGE(pai) &&
5197 IS_INTERNAL_PAGE(pai) &&
5198 !is_altacct) {
5199 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
5200 OSAddAtomic(-1, &pmap->stats.reusable);
5201 } else if (IS_INTERNAL_PAGE(pai)) {
5202 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
5203 OSAddAtomic(-1, &pmap->stats.internal);
5204 } else {
5205 PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
5206 OSAddAtomic(-1, &pmap->stats.external);
5207 }
5208 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
5209 IS_INTERNAL_PAGE(pai)) {
5210 /* adjust "compressed" stats */
5211 OSAddAtomic64(+1, &pmap->stats.compressed);
5212 PMAP_STATS_PEAK(pmap->stats.compressed);
5213 pmap->stats.compressed_lifetime++;
5214 }
5215
5216 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5217 assert(IS_INTERNAL_PAGE(pai));
5218 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5219 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
5220 if (options & PMAP_OPTIONS_COMPRESSOR) {
5221 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5222 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
5223 }
5224
5225 /*
5226 * Cleanup our marker before
5227 * we free this pv_entry.
5228 */
5229 CLR_ALTACCT_PAGE(pai, pve_p);
5230 } else if (IS_REUSABLE_PAGE(pai)) {
5231 assert(IS_INTERNAL_PAGE(pai));
5232 if (options & PMAP_OPTIONS_COMPRESSOR) {
5233 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5234 /* was not in footprint, but is now */
5235 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5236 }
5237 } else if (IS_INTERNAL_PAGE(pai)) {
5238 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5239
5240 /*
5241 * Update all stats related to physical footprint, which only
5242 * deals with internal pages.
5243 */
5244 if (options & PMAP_OPTIONS_COMPRESSOR) {
5245 /*
5246 * This removal is only being done so we can send this page to
5247 * the compressor; therefore it mustn't affect total task footprint.
5248 */
5249 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5250 } else {
5251 /*
5252 * This internal page isn't going to the compressor, so adjust stats to keep
5253 * phys_footprint up to date.
5254 */
5255 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5256 }
5257 } else {
5258 /* external page: no impact on ledgers */
5259 }
5260 }
5261
5262 if (pve_p != PV_ENTRY_NULL) {
5263 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5264 }
5265 } else {
5266 pt_entry_t spte;
5267
5268 spte = *pte_p;
5269
5270 if (pmap == kernel_pmap) {
5271 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5272 } else {
5273 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
5274 }
5275
5276 pte_set_was_writeable(tmplate, false);
5277
5278 #if (__ARM_VMSA__ == 7)
5279 if (set_NX) {
5280 tmplate |= ARM_PTE_NX;
5281 } else {
5282 /*
5283 * While the naive implementation of this would serve to add execute
5284 * permission, this is not how the VM uses this interface, or how
5285 * x86_64 implements it. So ignore requests to add execute permissions.
5286 */
5287 #if 0
5288 tmplate &= ~ARM_PTE_NX;
5289 #else
5290 ;
5291 #endif
5292 }
5293 #else
5294 if (set_NX) {
5295 tmplate |= ARM_PTE_NX | ARM_PTE_PNX;
5296 } else {
5297 /*
5298 * While the naive implementation of this would serve to add execute
5299 * permission, this is not how the VM uses this interface, or how
5300 * x86_64 implements it. So ignore requests to add execute permissions.
5301 */
5302 #if 0
5303 if (pmap == kernel_pmap) {
5304 tmplate &= ~ARM_PTE_PNX;
5305 tmplate |= ARM_PTE_NX;
5306 } else {
5307 tmplate &= ~ARM_PTE_NX;
5308 tmplate |= ARM_PTE_PNX;
5309 }
5310 #else
5311 ;
5312 #endif
5313 }
5314 #endif
5315
5316
5317 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5318 !ARM_PTE_IS_COMPRESSED(*pte_p) &&
5319 *pte_p != tmplate) {
5320 WRITE_PTE_STRONG(pte_p, tmplate);
5321 update = TRUE;
5322 }
5323 }
5324
5325 /* Invalidate TLBs for all CPUs using it */
5326 if (update) {
5327 tlb_flush_needed = TRUE;
5328 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
5329 }
5330
5331 #ifdef PVH_FLAG_IOMMU
5332 protect_skip_pve:
5333 #endif
5334 pte_p = PT_ENTRY_NULL;
5335 pvet_p = pve_p;
5336 if (pve_p != PV_ENTRY_NULL) {
5337 if (remove) {
5338 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5339 }
5340 pve_pp = pve_link_field(pve_p);
5341 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5342 }
5343 }
5344
5345 #ifdef PVH_FLAG_EXEC
5346 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
5347 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
5348 }
5349 #endif
5350 if (tlb_flush_needed) {
5351 sync_tlb_flush();
5352 }
5353
5354 /* if we removed a bunch of entries, take care of them now */
5355 if (remove) {
5356 if (new_pve_p != PV_ENTRY_NULL) {
5357 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
5358 pvh_set_flags(pv_h, pvh_flags);
5359 } else if (new_pte_p != PT_ENTRY_NULL) {
5360 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
5361 pvh_set_flags(pv_h, pvh_flags);
5362 } else {
5363 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
5364 }
5365 }
5366
5367 UNLOCK_PVH(pai);
5368
5369 if (remove && (pvet_p != PV_ENTRY_NULL)) {
5370 pv_list_free(pveh_p, pvet_p, pvh_cnt);
5371 }
5372 }
5373
5374 void
5375 pmap_page_protect_options(
5376 ppnum_t ppnum,
5377 vm_prot_t prot,
5378 unsigned int options,
5379 __unused void *arg)
5380 {
5381 pmap_paddr_t phys = ptoa(ppnum);
5382
5383 assert(ppnum != vm_page_fictitious_addr);
5384
5385 /* Only work with managed pages. */
5386 if (!pa_valid(phys)) {
5387 return;
5388 }
5389
5390 /*
5391 * Determine the new protection.
5392 */
5393 if (prot == VM_PROT_ALL) {
5394 return; /* nothing to do */
5395 }
5396
5397 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5398
5399 pmap_page_protect_options_internal(ppnum, prot, options);
5400
5401 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5402 }
5403
5404 /*
5405 * Indicates if the pmap layer enforces some additional restrictions on the
5406 * given set of protections.
5407 */
5408 bool
5409 pmap_has_prot_policy(__unused vm_prot_t prot)
5410 {
5411 return FALSE;
5412 }
5413
5414 /*
5415 * Set the physical protection on the
5416 * specified range of this map as requested.
5417 * VERY IMPORTANT: Will not increase permissions.
5418 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5419 */
5420 void
5421 pmap_protect(
5422 pmap_t pmap,
5423 vm_map_address_t b,
5424 vm_map_address_t e,
5425 vm_prot_t prot)
5426 {
5427 pmap_protect_options(pmap, b, e, prot, 0, NULL);
5428 }
5429
5430 MARK_AS_PMAP_TEXT static void
5431 pmap_protect_options_internal(
5432 pmap_t pmap,
5433 vm_map_address_t start,
5434 vm_map_address_t end,
5435 vm_prot_t prot,
5436 unsigned int options,
5437 __unused void *args)
5438 {
5439 tt_entry_t *tte_p;
5440 pt_entry_t *bpte_p, *epte_p;
5441 pt_entry_t *pte_p;
5442 boolean_t set_NX = TRUE;
5443 #if (__ARM_VMSA__ > 7)
5444 boolean_t set_XO = FALSE;
5445 #endif
5446 boolean_t should_have_removed = FALSE;
5447
5448 #ifndef __ARM_IC_NOALIAS_ICACHE__
5449 boolean_t InvalidatePoU_Icache_Done = FALSE;
5450 #endif
5451
5452 if (__improbable(end < start)) {
5453 panic("%s called with bogus range: %p, %p", __func__, (void*)start, (void*)end);
5454 }
5455
5456 #if DEVELOPMENT || DEBUG
5457 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5458 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5459 should_have_removed = TRUE;
5460 }
5461 } else
5462 #endif
5463 {
5464 /* Determine the new protection. */
5465 switch (prot) {
5466 #if (__ARM_VMSA__ > 7)
5467 case VM_PROT_EXECUTE:
5468 set_XO = TRUE;
5469 /* fall through */
5470 #endif
5471 case VM_PROT_READ:
5472 case VM_PROT_READ | VM_PROT_EXECUTE:
5473 break;
5474 case VM_PROT_READ | VM_PROT_WRITE:
5475 case VM_PROT_ALL:
5476 return; /* nothing to do */
5477 default:
5478 should_have_removed = TRUE;
5479 }
5480 }
5481
5482 if (should_have_removed) {
5483 panic("%s: should have been a remove operation, "
5484 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5485 __FUNCTION__,
5486 pmap, (void *)start, (void *)end, prot, options, args);
5487 }
5488
5489 #if DEVELOPMENT || DEBUG
5490 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5491 #else
5492 if ((prot & VM_PROT_EXECUTE))
5493 #endif
5494 {
5495 set_NX = FALSE;
5496 } else {
5497 set_NX = TRUE;
5498 }
5499
5500 VALIDATE_PMAP(pmap);
5501 PMAP_LOCK(pmap);
5502 tte_p = pmap_tte(pmap, start);
5503
5504 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5505 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
5506 bpte_p = &bpte_p[ptenum(start)];
5507 epte_p = bpte_p + arm_atop(end - start);
5508 pte_p = bpte_p;
5509
5510 for (pte_p = bpte_p;
5511 pte_p < epte_p;
5512 pte_p += PAGE_SIZE / ARM_PGBYTES) {
5513 pt_entry_t spte;
5514 #if DEVELOPMENT || DEBUG
5515 boolean_t force_write = FALSE;
5516 #endif
5517
5518 spte = *pte_p;
5519
5520 if ((spte == ARM_PTE_TYPE_FAULT) ||
5521 ARM_PTE_IS_COMPRESSED(spte)) {
5522 continue;
5523 }
5524
5525 pmap_paddr_t pa;
5526 int pai = 0;
5527 boolean_t managed = FALSE;
5528
5529 while (!managed) {
5530 /*
5531 * It may be possible for the pte to transition from managed
5532 * to unmanaged in this timeframe; for now, elide the assert.
5533 * We should break out as a consequence of checking pa_valid.
5534 */
5535 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5536 pa = pte_to_pa(spte);
5537 if (!pa_valid(pa)) {
5538 break;
5539 }
5540 pai = (int)pa_index(pa);
5541 LOCK_PVH(pai);
5542 spte = *pte_p;
5543 pa = pte_to_pa(spte);
5544 if (pai == (int)pa_index(pa)) {
5545 managed = TRUE;
5546 break; // Leave the PVH locked as we will unlock it after we free the PTE
5547 }
5548 UNLOCK_PVH(pai);
5549 }
5550
5551 if ((spte == ARM_PTE_TYPE_FAULT) ||
5552 ARM_PTE_IS_COMPRESSED(spte)) {
5553 continue;
5554 }
5555
5556 pt_entry_t tmplate;
5557
5558 if (pmap == kernel_pmap) {
5559 #if DEVELOPMENT || DEBUG
5560 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5561 force_write = TRUE;
5562 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
5563 } else
5564 #endif
5565 {
5566 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5567 }
5568 } else {
5569 #if DEVELOPMENT || DEBUG
5570 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5571 force_write = TRUE;
5572 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWRW));
5573 } else
5574 #endif
5575 {
5576 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
5577 }
5578 }
5579
5580 /*
5581 * XXX Removing "NX" would
5582 * grant "execute" access
5583 * immediately, bypassing any
5584 * checks VM might want to do
5585 * in its soft fault path.
5586 * pmap_protect() and co. are
5587 * not allowed to increase
5588 * access permissions.
5589 */
5590 #if (__ARM_VMSA__ == 7)
5591 if (set_NX) {
5592 tmplate |= ARM_PTE_NX;
5593 } else {
5594 /* do NOT clear "NX"! */
5595 }
5596 #else
5597 if (set_NX) {
5598 tmplate |= ARM_PTE_NX | ARM_PTE_PNX;
5599 } else {
5600 if (pmap == kernel_pmap) {
5601 /*
5602 * TODO: Run CS/Monitor checks here;
5603 * should we be clearing PNX here? Is
5604 * this just for dtrace?
5605 */
5606 tmplate &= ~ARM_PTE_PNX;
5607 tmplate |= ARM_PTE_NX;
5608 } else {
5609 /* do NOT clear "NX"! */
5610 tmplate |= ARM_PTE_PNX;
5611 if (set_XO) {
5612 tmplate &= ~ARM_PTE_APMASK;
5613 tmplate |= ARM_PTE_AP(AP_RONA);
5614 }
5615 }
5616 }
5617 #endif
5618
5619 #if DEVELOPMENT || DEBUG
5620 if (force_write) {
5621 /*
5622 * TODO: Run CS/Monitor checks here.
5623 */
5624 if (managed) {
5625 /*
5626 * We are marking the page as writable,
5627 * so we consider it to be modified and
5628 * referenced.
5629 */
5630 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5631 tmplate |= ARM_PTE_AF;
5632
5633 if (IS_REFFAULT_PAGE(pai)) {
5634 CLR_REFFAULT_PAGE(pai);
5635 }
5636
5637 if (IS_MODFAULT_PAGE(pai)) {
5638 CLR_MODFAULT_PAGE(pai);
5639 }
5640 }
5641 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5642 /*
5643 * An immediate request for anything other than
5644 * write should still mark the page as
5645 * referenced if managed.
5646 */
5647 if (managed) {
5648 pa_set_bits(pa, PP_ATTR_REFERENCED);
5649 tmplate |= ARM_PTE_AF;
5650
5651 if (IS_REFFAULT_PAGE(pai)) {
5652 CLR_REFFAULT_PAGE(pai);
5653 }
5654 }
5655 }
5656 #endif
5657
5658 /* We do not expect to write fast fault the entry. */
5659 pte_set_was_writeable(tmplate, false);
5660
5661 /* TODO: Doesn't this need to worry about PNX? */
5662 if (((spte & ARM_PTE_NX) == ARM_PTE_NX) && (prot & VM_PROT_EXECUTE)) {
5663 CleanPoU_DcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
5664 #ifdef __ARM_IC_NOALIAS_ICACHE__
5665 InvalidatePoU_IcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
5666 #else
5667 if (!InvalidatePoU_Icache_Done) {
5668 InvalidatePoU_Icache();
5669 InvalidatePoU_Icache_Done = TRUE;
5670 }
5671 #endif
5672 }
5673
5674 WRITE_PTE_FAST(pte_p, tmplate);
5675
5676 if (managed) {
5677 ASSERT_PVH_LOCKED(pai);
5678 UNLOCK_PVH(pai);
5679 }
5680 }
5681
5682 FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
5683 PMAP_UPDATE_TLBS(pmap, start, end);
5684 }
5685
5686 PMAP_UNLOCK(pmap);
5687 }
5688
5689 void
5690 pmap_protect_options(
5691 pmap_t pmap,
5692 vm_map_address_t b,
5693 vm_map_address_t e,
5694 vm_prot_t prot,
5695 unsigned int options,
5696 __unused void *args)
5697 {
5698 vm_map_address_t l, beg;
5699
5700 if ((b | e) & PAGE_MASK) {
5701 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5702 pmap, (uint64_t)b, (uint64_t)e);
5703 }
5704
5705 #if DEVELOPMENT || DEBUG
5706 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5707 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5708 pmap_remove_options(pmap, b, e, options);
5709 return;
5710 }
5711 } else
5712 #endif
5713 {
5714 /* Determine the new protection. */
5715 switch (prot) {
5716 case VM_PROT_EXECUTE:
5717 case VM_PROT_READ:
5718 case VM_PROT_READ | VM_PROT_EXECUTE:
5719 break;
5720 case VM_PROT_READ | VM_PROT_WRITE:
5721 case VM_PROT_ALL:
5722 return; /* nothing to do */
5723 default:
5724 pmap_remove_options(pmap, b, e, options);
5725 return;
5726 }
5727 }
5728
5729 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
5730 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
5731 VM_KERNEL_ADDRHIDE(e));
5732
5733 beg = b;
5734
5735 while (beg < e) {
5736 l = ((beg + ARM_TT_TWIG_SIZE) & ~ARM_TT_TWIG_OFFMASK);
5737
5738 if (l > e) {
5739 l = e;
5740 }
5741
5742 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
5743
5744 beg = l;
5745 }
5746
5747 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5748 }
5749
5750 /* Map a (possibly) autogenned block */
5751 kern_return_t
5752 pmap_map_block(
5753 pmap_t pmap,
5754 addr64_t va,
5755 ppnum_t pa,
5756 uint32_t size,
5757 vm_prot_t prot,
5758 int attr,
5759 __unused unsigned int flags)
5760 {
5761 kern_return_t kr;
5762 addr64_t original_va = va;
5763 uint32_t page;
5764
5765 for (page = 0; page < size; page++) {
5766 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
5767
5768 if (kr != KERN_SUCCESS) {
5769 /*
5770 * This will panic for now, as it is unclear that
5771 * removing the mappings is correct.
5772 */
5773 panic("%s: failed pmap_enter, "
5774 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
5775 __FUNCTION__,
5776 pmap, va, pa, size, prot, flags);
5777
5778 pmap_remove(pmap, original_va, va - original_va);
5779 return kr;
5780 }
5781
5782 va += PAGE_SIZE;
5783 pa++;
5784 }
5785
5786 return KERN_SUCCESS;
5787 }
5788
5789 /*
5790 * Insert the given physical page (p) at
5791 * the specified virtual address (v) in the
5792 * target physical map with the protection requested.
5793 *
5794 * If specified, the page will be wired down, meaning
5795 * that the related pte can not be reclaimed.
5796 *
5797 * NB: This is the only routine which MAY NOT lazy-evaluate
5798 * or lose information. That is, this routine must actually
5799 * insert this page into the given map eventually (must make
5800 * forward progress eventually.
5801 */
5802 kern_return_t
5803 pmap_enter(
5804 pmap_t pmap,
5805 vm_map_address_t v,
5806 ppnum_t pn,
5807 vm_prot_t prot,
5808 vm_prot_t fault_type,
5809 unsigned int flags,
5810 boolean_t wired)
5811 {
5812 return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
5813 }
5814
5815
5816 static inline void
5817 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
5818 {
5819 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
5820 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
5821 if (pte & ARM_PTE_WIRED) {
5822 OSAddAtomic16(1, ptd_wiredcnt_ptr);
5823 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5824 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
5825 } else {
5826 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
5827 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5828 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5829 }
5830 }
5831 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5832 !ARM_PTE_IS_COMPRESSED(*pte_p)) {
5833 WRITE_PTE_STRONG(pte_p, pte);
5834 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
5835 } else {
5836 WRITE_PTE(pte_p, pte);
5837 __builtin_arm_isb(ISB_SY);
5838 }
5839
5840 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + PAGE_SIZE), pte);
5841 }
5842
5843 static pt_entry_t
5844 wimg_to_pte(unsigned int wimg)
5845 {
5846 pt_entry_t pte;
5847
5848 switch (wimg & (VM_WIMG_MASK)) {
5849 case VM_WIMG_IO:
5850 case VM_WIMG_RT:
5851 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5852 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5853 break;
5854 case VM_WIMG_POSTED:
5855 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
5856 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5857 break;
5858 case VM_WIMG_WCOMB:
5859 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5860 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5861 break;
5862 case VM_WIMG_WTHRU:
5863 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
5864 #if (__ARM_VMSA__ > 7)
5865 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5866 #else
5867 pte |= ARM_PTE_SH;
5868 #endif
5869 break;
5870 case VM_WIMG_COPYBACK:
5871 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
5872 #if (__ARM_VMSA__ > 7)
5873 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5874 #else
5875 pte |= ARM_PTE_SH;
5876 #endif
5877 break;
5878 case VM_WIMG_INNERWBACK:
5879 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
5880 #if (__ARM_VMSA__ > 7)
5881 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
5882 #else
5883 pte |= ARM_PTE_SH;
5884 #endif
5885 break;
5886 default:
5887 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
5888 #if (__ARM_VMSA__ > 7)
5889 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5890 #else
5891 pte |= ARM_PTE_SH;
5892 #endif
5893 }
5894
5895 return pte;
5896 }
5897
5898 static boolean_t
5899 pmap_enter_pv(
5900 pmap_t pmap,
5901 pt_entry_t *pte_p,
5902 int pai,
5903 unsigned int options,
5904 pv_entry_t **pve_p,
5905 boolean_t *is_altacct)
5906 {
5907 pv_entry_t **pv_h;
5908 pv_h = pai_to_pvh(pai);
5909 boolean_t first_cpu_mapping;
5910
5911 ASSERT_PVH_LOCKED(pai);
5912
5913 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
5914
5915
5916 #ifdef PVH_FLAG_CPU
5917 /* An IOMMU mapping may already be present for a page that hasn't yet
5918 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
5919 * if this is the first CPU mapping. We base internal/reusable
5920 * accounting on the options specified for the first CPU mapping.
5921 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
5922 * as there are *any* mappings of the page. The accounting for a
5923 * page should not need to change until the page is recycled by the
5924 * VM layer, and we assert that there are no mappings when a page
5925 * is recycled. An IOMMU mapping of a freed/recycled page is
5926 * considered a security violation & potential DMA corruption path.*/
5927 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
5928 if (first_cpu_mapping) {
5929 pvh_flags |= PVH_FLAG_CPU;
5930 }
5931 #else
5932 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
5933 #endif
5934
5935 if (first_cpu_mapping) {
5936 if (options & PMAP_OPTIONS_INTERNAL) {
5937 SET_INTERNAL_PAGE(pai);
5938 } else {
5939 CLR_INTERNAL_PAGE(pai);
5940 }
5941 if ((options & PMAP_OPTIONS_INTERNAL) &&
5942 (options & PMAP_OPTIONS_REUSABLE)) {
5943 SET_REUSABLE_PAGE(pai);
5944 } else {
5945 CLR_REUSABLE_PAGE(pai);
5946 }
5947 }
5948 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
5949 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
5950 if (pmap != NULL && pmap != kernel_pmap &&
5951 ((options & PMAP_OPTIONS_ALT_ACCT) ||
5952 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
5953 IS_INTERNAL_PAGE(pai)) {
5954 /*
5955 * Make a note to ourselves that this mapping is using alternative
5956 * accounting. We'll need this in order to know which ledger to
5957 * debit when the mapping is removed.
5958 *
5959 * The altacct bit must be set while the pv head is locked. Defer
5960 * the ledger accounting until after we've dropped the lock.
5961 */
5962 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5963 *is_altacct = TRUE;
5964 } else {
5965 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5966 }
5967 } else {
5968 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5969 pt_entry_t *pte1_p;
5970
5971 /*
5972 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
5973 */
5974 pte1_p = pvh_ptep(pv_h);
5975 pvh_set_flags(pv_h, pvh_flags);
5976 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
5977 return FALSE;
5978 }
5979
5980 pve_set_ptep(*pve_p, pte1_p);
5981 (*pve_p)->pve_next = PV_ENTRY_NULL;
5982
5983 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
5984 /*
5985 * transfer "altacct" from
5986 * pp_attr to this pve
5987 */
5988 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5989 SET_ALTACCT_PAGE(pai, *pve_p);
5990 }
5991 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
5992 *pve_p = PV_ENTRY_NULL;
5993 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5994 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
5995 __func__, *pv_h, pte_p, pmap, pv_h);
5996 }
5997 /*
5998 * Set up pv_entry for this new mapping and then
5999 * add it to the list for this physical page.
6000 */
6001 pvh_set_flags(pv_h, pvh_flags);
6002 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
6003 return FALSE;
6004 }
6005
6006 pve_set_ptep(*pve_p, pte_p);
6007 (*pve_p)->pve_next = PV_ENTRY_NULL;
6008
6009 pvh_add(pv_h, *pve_p);
6010
6011 if (pmap != NULL && pmap != kernel_pmap &&
6012 ((options & PMAP_OPTIONS_ALT_ACCT) ||
6013 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
6014 IS_INTERNAL_PAGE(pai)) {
6015 /*
6016 * Make a note to ourselves that this
6017 * mapping is using alternative
6018 * accounting. We'll need this in order
6019 * to know which ledger to debit when
6020 * the mapping is removed.
6021 *
6022 * The altacct bit must be set while
6023 * the pv head is locked. Defer the
6024 * ledger accounting until after we've
6025 * dropped the lock.
6026 */
6027 SET_ALTACCT_PAGE(pai, *pve_p);
6028 *is_altacct = TRUE;
6029 }
6030
6031 *pve_p = PV_ENTRY_NULL;
6032 }
6033
6034 pvh_set_flags(pv_h, pvh_flags);
6035
6036 return TRUE;
6037 }
6038
6039 MARK_AS_PMAP_TEXT static kern_return_t
6040 pmap_enter_options_internal(
6041 pmap_t pmap,
6042 vm_map_address_t v,
6043 ppnum_t pn,
6044 vm_prot_t prot,
6045 vm_prot_t fault_type,
6046 unsigned int flags,
6047 boolean_t wired,
6048 unsigned int options)
6049 {
6050 pmap_paddr_t pa = ptoa(pn);
6051 pt_entry_t pte;
6052 pt_entry_t spte;
6053 pt_entry_t *pte_p;
6054 pv_entry_t *pve_p;
6055 boolean_t set_NX;
6056 boolean_t set_XO = FALSE;
6057 boolean_t refcnt_updated;
6058 boolean_t wiredcnt_updated;
6059 unsigned int wimg_bits;
6060 boolean_t was_compressed, was_alt_compressed;
6061 kern_return_t kr = KERN_SUCCESS;
6062
6063 VALIDATE_PMAP(pmap);
6064
6065 if ((v) & PAGE_MASK) {
6066 panic("pmap_enter_options() pmap %p v 0x%llx\n",
6067 pmap, (uint64_t)v);
6068 }
6069
6070 if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
6071 panic("pmap_enter_options(): WX request on kernel_pmap");
6072 }
6073
6074 #if DEVELOPMENT || DEBUG
6075 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
6076 #else
6077 if ((prot & VM_PROT_EXECUTE))
6078 #endif
6079 { set_NX = FALSE;} else {
6080 set_NX = TRUE;
6081 }
6082
6083 #if (__ARM_VMSA__ > 7)
6084 if (prot == VM_PROT_EXECUTE) {
6085 set_XO = TRUE;
6086 }
6087 #endif
6088
6089 assert(pn != vm_page_fictitious_addr);
6090
6091 refcnt_updated = FALSE;
6092 wiredcnt_updated = FALSE;
6093 pve_p = PV_ENTRY_NULL;
6094 was_compressed = FALSE;
6095 was_alt_compressed = FALSE;
6096
6097 PMAP_LOCK(pmap);
6098
6099 /*
6100 * Expand pmap to include this pte. Assume that
6101 * pmap is always expanded to include enough hardware
6102 * pages to map one VM page.
6103 */
6104 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
6105 /* Must unlock to expand the pmap. */
6106 PMAP_UNLOCK(pmap);
6107
6108 kr = pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
6109
6110 if (kr != KERN_SUCCESS) {
6111 return kr;
6112 }
6113
6114 PMAP_LOCK(pmap);
6115 }
6116
6117 if (options & PMAP_OPTIONS_NOENTER) {
6118 PMAP_UNLOCK(pmap);
6119 return KERN_SUCCESS;
6120 }
6121
6122 Pmap_enter_retry:
6123
6124 spte = *pte_p;
6125
6126 if (ARM_PTE_IS_COMPRESSED(spte)) {
6127 /*
6128 * "pmap" should be locked at this point, so this should
6129 * not race with another pmap_enter() or pmap_remove_range().
6130 */
6131 assert(pmap != kernel_pmap);
6132
6133 /* one less "compressed" */
6134 OSAddAtomic64(-1, &pmap->stats.compressed);
6135 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
6136 PAGE_SIZE);
6137
6138 was_compressed = TRUE;
6139 if (spte & ARM_PTE_COMPRESSED_ALT) {
6140 was_alt_compressed = TRUE;
6141 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
6142 } else {
6143 /* was part of the footprint */
6144 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6145 }
6146
6147 /* clear "compressed" marker */
6148 /* XXX is it necessary since we're about to overwrite it ? */
6149 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
6150 spte = ARM_PTE_TYPE_FAULT;
6151
6152 /*
6153 * We're replacing a "compressed" marker with a valid PTE,
6154 * so no change for "refcnt".
6155 */
6156 refcnt_updated = TRUE;
6157 }
6158
6159 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
6160 pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
6161 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
6162 }
6163
6164 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
6165
6166 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6167 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6168 * to be wired in nearly all cases. For VM layer functionality, the wired
6169 * count in vm_page_t is sufficient. */
6170 if (wired && pmap != kernel_pmap) {
6171 pte |= ARM_PTE_WIRED;
6172 }
6173
6174 #if (__ARM_VMSA__ == 7)
6175 if (set_NX) {
6176 pte |= ARM_PTE_NX;
6177 }
6178 #else
6179 if (set_NX) {
6180 pte |= ARM_PTE_NX | ARM_PTE_PNX;
6181 } else {
6182 if (pmap == kernel_pmap) {
6183 pte |= ARM_PTE_NX;
6184 } else {
6185 pte |= ARM_PTE_PNX;
6186 }
6187 }
6188 #endif
6189
6190 if (pmap == kernel_pmap) {
6191 #if __ARM_KERNEL_PROTECT__
6192 pte |= ARM_PTE_NG;
6193 #endif /* __ARM_KERNEL_PROTECT__ */
6194 if (prot & VM_PROT_WRITE) {
6195 pte |= ARM_PTE_AP(AP_RWNA);
6196 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
6197 } else {
6198 pte |= ARM_PTE_AP(AP_RONA);
6199 pa_set_bits(pa, PP_ATTR_REFERENCED);
6200 }
6201 #if (__ARM_VMSA__ == 7)
6202 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
6203 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
6204 }
6205 #endif
6206 } else {
6207 if (!(pmap->nested)) {
6208 pte |= ARM_PTE_NG;
6209 } else if ((pmap->nested_region_asid_bitmap)
6210 && (v >= pmap->nested_region_subord_addr)
6211 && (v < (pmap->nested_region_subord_addr + pmap->nested_region_size))) {
6212 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> ARM_TT_TWIG_SHIFT);
6213
6214 if ((pmap->nested_region_asid_bitmap)
6215 && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
6216 pte |= ARM_PTE_NG;
6217 }
6218 }
6219 #if MACH_ASSERT
6220 if (pmap->nested_pmap != NULL) {
6221 vm_map_address_t nest_vaddr;
6222 pt_entry_t *nest_pte_p;
6223
6224 nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
6225
6226 if ((nest_vaddr >= pmap->nested_region_subord_addr)
6227 && (nest_vaddr < (pmap->nested_region_subord_addr + pmap->nested_region_size))
6228 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
6229 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
6230 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p))
6231 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
6232 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> ARM_TT_TWIG_SHIFT);
6233
6234 if ((pmap->nested_pmap->nested_region_asid_bitmap)
6235 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
6236 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
6237 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
6238 }
6239 }
6240 }
6241 #endif
6242 if (prot & VM_PROT_WRITE) {
6243 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
6244 if (fault_type & VM_PROT_WRITE) {
6245 if (set_XO) {
6246 pte |= ARM_PTE_AP(AP_RWNA);
6247 } else {
6248 pte |= ARM_PTE_AP(AP_RWRW);
6249 }
6250 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
6251 } else {
6252 if (set_XO) {
6253 pte |= ARM_PTE_AP(AP_RONA);
6254 } else {
6255 pte |= ARM_PTE_AP(AP_RORO);
6256 }
6257 pa_set_bits(pa, PP_ATTR_REFERENCED);
6258 pte_set_was_writeable(pte, true);
6259 }
6260 } else {
6261 if (set_XO) {
6262 pte |= ARM_PTE_AP(AP_RWNA);
6263 } else {
6264 pte |= ARM_PTE_AP(AP_RWRW);
6265 }
6266 pa_set_bits(pa, PP_ATTR_REFERENCED);
6267 }
6268 } else {
6269 if (set_XO) {
6270 pte |= ARM_PTE_AP(AP_RONA);
6271 } else {
6272 pte |= ARM_PTE_AP(AP_RORO);
6273 }
6274 pa_set_bits(pa, PP_ATTR_REFERENCED);
6275 }
6276 }
6277
6278 pte |= ARM_PTE_AF;
6279
6280 volatile uint16_t *refcnt = NULL;
6281 volatile uint16_t *wiredcnt = NULL;
6282 if (pmap != kernel_pmap) {
6283 refcnt = &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt);
6284 wiredcnt = &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
6285 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
6286 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6287 * a new PV entry. */
6288 if (!wiredcnt_updated) {
6289 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
6290 wiredcnt_updated = TRUE;
6291 }
6292 if (!refcnt_updated) {
6293 OSAddAtomic16(1, (volatile int16_t*)refcnt);
6294 refcnt_updated = TRUE;
6295 }
6296 }
6297
6298 if (pa_valid(pa)) {
6299 int pai;
6300 boolean_t is_altacct, is_internal;
6301
6302 is_internal = FALSE;
6303 is_altacct = FALSE;
6304
6305 pai = (int)pa_index(pa);
6306
6307 LOCK_PVH(pai);
6308
6309 Pmap_enter_loop:
6310 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6311 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6312 } else {
6313 wimg_bits = pmap_cache_attributes(pn);
6314 }
6315
6316 /* We may be retrying this operation after dropping the PVH lock.
6317 * Cache attributes for the physical page may have changed while the lock
6318 * was dropped, so clear any cache attributes we may have previously set
6319 * in the PTE template. */
6320 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
6321 pte |= wimg_to_pte(wimg_bits);
6322
6323
6324
6325 if (pte == *pte_p) {
6326 /*
6327 * This pmap_enter operation has been completed by another thread
6328 * undo refcnt on pt and return
6329 */
6330 UNLOCK_PVH(pai);
6331 goto Pmap_enter_cleanup;
6332 } else if (pte_to_pa(*pte_p) == pa) {
6333 pmap_enter_pte(pmap, pte_p, pte, v);
6334 UNLOCK_PVH(pai);
6335 goto Pmap_enter_cleanup;
6336 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
6337 /*
6338 * pte has been modified by another thread
6339 * hold refcnt on pt and retry pmap_enter operation
6340 */
6341 UNLOCK_PVH(pai);
6342 goto Pmap_enter_retry;
6343 }
6344 if (!pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct)) {
6345 goto Pmap_enter_loop;
6346 }
6347
6348 pmap_enter_pte(pmap, pte_p, pte, v);
6349
6350 if (pmap != kernel_pmap) {
6351 if (IS_REUSABLE_PAGE(pai) &&
6352 !is_altacct) {
6353 assert(IS_INTERNAL_PAGE(pai));
6354 OSAddAtomic(+1, &pmap->stats.reusable);
6355 PMAP_STATS_PEAK(pmap->stats.reusable);
6356 } else if (IS_INTERNAL_PAGE(pai)) {
6357 OSAddAtomic(+1, &pmap->stats.internal);
6358 PMAP_STATS_PEAK(pmap->stats.internal);
6359 is_internal = TRUE;
6360 } else {
6361 OSAddAtomic(+1, &pmap->stats.external);
6362 PMAP_STATS_PEAK(pmap->stats.external);
6363 }
6364 }
6365
6366 UNLOCK_PVH(pai);
6367
6368 if (pmap != kernel_pmap) {
6369 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
6370
6371 if (is_internal) {
6372 /*
6373 * Make corresponding adjustments to
6374 * phys_footprint statistics.
6375 */
6376 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
6377 if (is_altacct) {
6378 /*
6379 * If this page is internal and
6380 * in an IOKit region, credit
6381 * the task's total count of
6382 * dirty, internal IOKit pages.
6383 * It should *not* count towards
6384 * the task's total physical
6385 * memory footprint, because
6386 * this entire region was
6387 * already billed to the task
6388 * at the time the mapping was
6389 * created.
6390 *
6391 * Put another way, this is
6392 * internal++ and
6393 * alternate_accounting++, so
6394 * net effect on phys_footprint
6395 * is 0. That means: don't
6396 * touch phys_footprint here.
6397 */
6398 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
6399 } else {
6400 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6401 }
6402 }
6403 }
6404
6405 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
6406 if (pmap->stats.resident_count > pmap->stats.resident_max) {
6407 pmap->stats.resident_max = pmap->stats.resident_count;
6408 }
6409 } else {
6410 if (prot & VM_PROT_EXECUTE) {
6411 kr = KERN_FAILURE;
6412 goto Pmap_enter_cleanup;
6413 }
6414
6415 wimg_bits = pmap_cache_attributes(pn);
6416 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6417 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6418 }
6419
6420 pte |= wimg_to_pte(wimg_bits);
6421
6422 pmap_enter_pte(pmap, pte_p, pte, v);
6423 }
6424
6425 goto Pmap_enter_return;
6426
6427 Pmap_enter_cleanup:
6428
6429 if (refcnt != NULL) {
6430 assert(refcnt_updated);
6431 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
6432 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6433 }
6434 }
6435
6436 Pmap_enter_return:
6437
6438 #if CONFIG_PGTRACE
6439 if (pgtrace_enabled) {
6440 // Clone and invalidate original mapping if eligible
6441 for (int i = 0; i < PAGE_RATIO; i++) {
6442 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES * i, 0, 0);
6443 }
6444 }
6445 #endif
6446
6447 if (pve_p != PV_ENTRY_NULL) {
6448 pv_free(pve_p);
6449 }
6450
6451 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
6452 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6453 }
6454
6455 PMAP_UNLOCK(pmap);
6456
6457 return kr;
6458 }
6459
6460 kern_return_t
6461 pmap_enter_options(
6462 pmap_t pmap,
6463 vm_map_address_t v,
6464 ppnum_t pn,
6465 vm_prot_t prot,
6466 vm_prot_t fault_type,
6467 unsigned int flags,
6468 boolean_t wired,
6469 unsigned int options,
6470 __unused void *arg)
6471 {
6472 kern_return_t kr = KERN_FAILURE;
6473
6474 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
6475 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
6476
6477 kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
6478 pv_water_mark_check();
6479
6480 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
6481
6482 return kr;
6483 }
6484
6485 /*
6486 * Routine: pmap_change_wiring
6487 * Function: Change the wiring attribute for a map/virtual-address
6488 * pair.
6489 * In/out conditions:
6490 * The mapping must already exist in the pmap.
6491 */
6492 MARK_AS_PMAP_TEXT static void
6493 pmap_change_wiring_internal(
6494 pmap_t pmap,
6495 vm_map_address_t v,
6496 boolean_t wired)
6497 {
6498 pt_entry_t *pte_p;
6499 pmap_paddr_t pa;
6500
6501 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6502 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6503 * to be wired in nearly all cases. For VM layer functionality, the wired
6504 * count in vm_page_t is sufficient. */
6505 if (pmap == kernel_pmap) {
6506 return;
6507 }
6508 VALIDATE_USER_PMAP(pmap);
6509
6510 PMAP_LOCK(pmap);
6511 pte_p = pmap_pte(pmap, v);
6512 assert(pte_p != PT_ENTRY_NULL);
6513 pa = pte_to_pa(*pte_p);
6514 if (pa_valid(pa)) {
6515 LOCK_PVH((int)pa_index(pa));
6516 }
6517
6518 if (wired && !pte_is_wired(*pte_p)) {
6519 pte_set_wired(pte_p, wired);
6520 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
6521 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6522 } else if (!wired && pte_is_wired(*pte_p)) {
6523 PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
6524 pte_set_wired(pte_p, wired);
6525 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6526 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6527 }
6528
6529 if (pa_valid(pa)) {
6530 UNLOCK_PVH((int)pa_index(pa));
6531 }
6532
6533 PMAP_UNLOCK(pmap);
6534 }
6535
6536 void
6537 pmap_change_wiring(
6538 pmap_t pmap,
6539 vm_map_address_t v,
6540 boolean_t wired)
6541 {
6542 pmap_change_wiring_internal(pmap, v, wired);
6543 }
6544
6545 MARK_AS_PMAP_TEXT static ppnum_t
6546 pmap_find_phys_internal(
6547 pmap_t pmap,
6548 addr64_t va)
6549 {
6550 ppnum_t ppn = 0;
6551
6552 VALIDATE_PMAP(pmap);
6553
6554 if (pmap != kernel_pmap) {
6555 PMAP_LOCK(pmap);
6556 }
6557
6558 ppn = pmap_vtophys(pmap, va);
6559
6560 if (pmap != kernel_pmap) {
6561 PMAP_UNLOCK(pmap);
6562 }
6563
6564 return ppn;
6565 }
6566
6567 ppnum_t
6568 pmap_find_phys(
6569 pmap_t pmap,
6570 addr64_t va)
6571 {
6572 pmap_paddr_t pa = 0;
6573
6574 if (pmap == kernel_pmap) {
6575 pa = mmu_kvtop(va);
6576 } else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
6577 pa = mmu_uvtop(va);
6578 }
6579
6580 if (pa) {
6581 return (ppnum_t)(pa >> PAGE_SHIFT);
6582 }
6583
6584 if (not_in_kdp) {
6585 return pmap_find_phys_internal(pmap, va);
6586 } else {
6587 return pmap_vtophys(pmap, va);
6588 }
6589 }
6590
6591 pmap_paddr_t
6592 kvtophys(
6593 vm_offset_t va)
6594 {
6595 pmap_paddr_t pa;
6596
6597 pa = mmu_kvtop(va);
6598 if (pa) {
6599 return pa;
6600 }
6601 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
6602 if (pa) {
6603 pa |= (va & PAGE_MASK);
6604 }
6605
6606 return (pmap_paddr_t)pa;
6607 }
6608
6609 ppnum_t
6610 pmap_vtophys(
6611 pmap_t pmap,
6612 addr64_t va)
6613 {
6614 if ((va < pmap->min) || (va >= pmap->max)) {
6615 return 0;
6616 }
6617
6618 #if (__ARM_VMSA__ == 7)
6619 tt_entry_t *tte_p, tte;
6620 pt_entry_t *pte_p;
6621 ppnum_t ppn;
6622
6623 tte_p = pmap_tte(pmap, va);
6624 if (tte_p == (tt_entry_t *) NULL) {
6625 return (ppnum_t) 0;
6626 }
6627
6628 tte = *tte_p;
6629 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6630 pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
6631 ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
6632 #if DEVELOPMENT || DEBUG
6633 if (ppn != 0 &&
6634 ARM_PTE_IS_COMPRESSED(*pte_p)) {
6635 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6636 pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
6637 }
6638 #endif /* DEVELOPMENT || DEBUG */
6639 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6640 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
6641 ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
6642 } else {
6643 ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
6644 }
6645 } else {
6646 ppn = 0;
6647 }
6648 #else
6649 tt_entry_t *ttp;
6650 tt_entry_t tte;
6651 ppnum_t ppn = 0;
6652
6653 /* Level 0 currently unused */
6654
6655 #if __ARM64_TWO_LEVEL_PMAP__
6656 /* We have no L1 entry; go straight to the L2 entry */
6657 ttp = pmap_tt2e(pmap, va);
6658 tte = *ttp;
6659 #else
6660 /* Get first-level (1GB) entry */
6661 ttp = pmap_tt1e(pmap, va);
6662 tte = *ttp;
6663 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
6664 return ppn;
6665 }
6666
6667 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, va)];
6668 #endif
6669 if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID)) {
6670 return ppn;
6671 }
6672
6673 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6674 ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK) | (va & ARM_TT_L2_OFFMASK));
6675 return ppn;
6676 }
6677 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, va)];
6678 ppn = (ppnum_t) atop((tte & ARM_PTE_MASK) | (va & ARM_TT_L3_OFFMASK));
6679 #endif
6680
6681 return ppn;
6682 }
6683
6684 MARK_AS_PMAP_TEXT static vm_offset_t
6685 pmap_extract_internal(
6686 pmap_t pmap,
6687 vm_map_address_t va)
6688 {
6689 pmap_paddr_t pa = 0;
6690 ppnum_t ppn = 0;
6691
6692 if (pmap == NULL) {
6693 return 0;
6694 }
6695
6696 VALIDATE_PMAP(pmap);
6697
6698 PMAP_LOCK(pmap);
6699
6700 ppn = pmap_vtophys(pmap, va);
6701
6702 if (ppn != 0) {
6703 pa = ptoa(ppn) | ((va) & PAGE_MASK);
6704 }
6705
6706 PMAP_UNLOCK(pmap);
6707
6708 return pa;
6709 }
6710
6711 /*
6712 * Routine: pmap_extract
6713 * Function:
6714 * Extract the physical page address associated
6715 * with the given map/virtual_address pair.
6716 *
6717 */
6718 vm_offset_t
6719 pmap_extract(
6720 pmap_t pmap,
6721 vm_map_address_t va)
6722 {
6723 pmap_paddr_t pa = 0;
6724
6725 if (pmap == kernel_pmap) {
6726 pa = mmu_kvtop(va);
6727 } else if (pmap == vm_map_pmap(current_thread()->map)) {
6728 pa = mmu_uvtop(va);
6729 }
6730
6731 if (pa) {
6732 return pa;
6733 }
6734
6735 return pmap_extract_internal(pmap, va);
6736 }
6737
6738 /*
6739 * pmap_init_pte_page - Initialize a page table page.
6740 */
6741 void
6742 pmap_init_pte_page(
6743 pmap_t pmap,
6744 pt_entry_t *pte_p,
6745 vm_offset_t va,
6746 unsigned int ttlevel,
6747 boolean_t alloc_ptd)
6748 {
6749 pt_desc_t *ptdp = NULL;
6750 vm_offset_t *pvh;
6751
6752 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
6753
6754 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
6755 if (alloc_ptd) {
6756 /*
6757 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
6758 * on 4KB hardware, we may already have allocated a page table descriptor for a
6759 * bootstrap request, so we check for an existing PTD here.
6760 */
6761 ptdp = ptd_alloc(pmap, true);
6762 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
6763 } else {
6764 panic("pmap_init_pte_page(): pte_p %p", pte_p);
6765 }
6766 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
6767 ptdp = (pt_desc_t*)(pvh_list(pvh));
6768 } else {
6769 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
6770 }
6771
6772 bzero(pte_p, ARM_PGBYTES);
6773 // below barrier ensures the page zeroing is visible to PTW before
6774 // it is linked to the PTE of previous level
6775 __builtin_arm_dmb(DMB_ISHST);
6776 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
6777 }
6778
6779 /*
6780 * Routine: pmap_expand
6781 *
6782 * Expands a pmap to be able to map the specified virtual address.
6783 *
6784 * Allocates new memory for the default (COARSE) translation table
6785 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6786 * also allocates space for the corresponding pv entries.
6787 *
6788 * Nothing should be locked.
6789 */
6790 static kern_return_t
6791 pmap_expand(
6792 pmap_t pmap,
6793 vm_map_address_t v,
6794 unsigned int options,
6795 unsigned int level)
6796 {
6797 #if (__ARM_VMSA__ == 7)
6798 vm_offset_t pa;
6799 tt_entry_t *tte_p;
6800 tt_entry_t *tt_p;
6801 unsigned int i;
6802
6803
6804 while (tte_index(pmap, v) >= pmap->tte_index_max) {
6805 tte_p = pmap_tt1_allocate(pmap, 2 * ARM_PGBYTES, ((options & PMAP_OPTIONS_NOWAIT)? PMAP_TT_ALLOCATE_NOWAIT : 0));
6806 if (tte_p == (tt_entry_t *)0) {
6807 return KERN_RESOURCE_SHORTAGE;
6808 }
6809
6810 PMAP_LOCK(pmap);
6811 if (pmap->tte_index_max > NTTES) {
6812 pmap_tt1_deallocate(pmap, tte_p, 2 * ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
6813 PMAP_UNLOCK(pmap);
6814 break;
6815 }
6816
6817 pmap_simple_lock(&pmap->tt1_lock);
6818 for (i = 0; i < pmap->tte_index_max; i++) {
6819 tte_p[i] = pmap->tte[i];
6820 }
6821 for (i = NTTES; i < 2 * NTTES; i++) {
6822 tte_p[i] = ARM_TTE_TYPE_FAULT;
6823 }
6824
6825 pmap->prev_tte = pmap->tte;
6826 pmap->tte = tte_p;
6827 pmap->ttep = ml_static_vtop((vm_offset_t)pmap->tte);
6828
6829 FLUSH_PTE_RANGE(pmap->tte, pmap->tte + (2 * NTTES));
6830
6831 pmap->tte_index_max = 2 * NTTES;
6832 pmap->stamp = hw_atomic_add(&pmap_stamp, 1);
6833
6834 for (i = 0; i < NTTES; i++) {
6835 pmap->prev_tte[i] = ARM_TTE_TYPE_FAULT;
6836 }
6837
6838 FLUSH_PTE_RANGE(pmap->prev_tte, pmap->prev_tte + NTTES);
6839
6840 pmap_simple_unlock(&pmap->tt1_lock);
6841 PMAP_UNLOCK(pmap);
6842 pmap_set_pmap(pmap, current_thread());
6843 }
6844
6845 if (level == 1) {
6846 return KERN_SUCCESS;
6847 }
6848
6849 {
6850 tt_entry_t *tte_next_p;
6851
6852 PMAP_LOCK(pmap);
6853 pa = 0;
6854 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
6855 PMAP_UNLOCK(pmap);
6856 return KERN_SUCCESS;
6857 }
6858 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
6859 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
6860 if (tte_to_pa(*tte_next_p)) {
6861 pa = tte_to_pa(*tte_next_p);
6862 break;
6863 }
6864 tte_next_p++;
6865 }
6866 pa = pa & ~PAGE_MASK;
6867 if (pa) {
6868 tte_p = &pmap->tte[ttenum(v)];
6869 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
6870 FLUSH_PTE(tte_p);
6871 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
6872 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
6873 PMAP_UNLOCK(pmap);
6874 return KERN_SUCCESS;
6875 }
6876 PMAP_UNLOCK(pmap);
6877 }
6878 v = v & ~ARM_TT_L1_PT_OFFMASK;
6879
6880
6881 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6882 /*
6883 * Allocate a VM page for the level 2 page table entries.
6884 */
6885 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6886 if (options & PMAP_OPTIONS_NOWAIT) {
6887 return KERN_RESOURCE_SHORTAGE;
6888 }
6889 VM_PAGE_WAIT();
6890 }
6891
6892 PMAP_LOCK(pmap);
6893 /*
6894 * See if someone else expanded us first
6895 */
6896 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6897 tt_entry_t *tte_next_p;
6898
6899 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
6900 pa = kvtophys((vm_offset_t)tt_p);
6901 #ifndef __ARM_L1_PTW__
6902 CleanPoU_DcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
6903 #endif
6904 tte_p = &pmap->tte[ttenum(v)];
6905 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
6906 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
6907 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
6908 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
6909 tte_next_p++;
6910 pa = pa + 0x400;
6911 }
6912 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
6913
6914 pa = 0x0ULL;
6915 tt_p = (tt_entry_t *)NULL;
6916 }
6917 PMAP_UNLOCK(pmap);
6918 if (tt_p != (tt_entry_t *)NULL) {
6919 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
6920 tt_p = (tt_entry_t *)NULL;
6921 }
6922 }
6923 return KERN_SUCCESS;
6924 #else
6925 pmap_paddr_t pa;
6926 #if __ARM64_TWO_LEVEL_PMAP__
6927 /* If we are using a two level page table, we'll start at L2. */
6928 unsigned int ttlevel = 2;
6929 #else
6930 /* Otherwise, we start at L1 (we use 3 levels by default). */
6931 unsigned int ttlevel = 1;
6932 #endif
6933 tt_entry_t *tte_p;
6934 tt_entry_t *tt_p;
6935
6936 pa = 0x0ULL;
6937 tt_p = (tt_entry_t *)NULL;
6938
6939 for (; ttlevel < level; ttlevel++) {
6940 PMAP_LOCK(pmap);
6941
6942 if (ttlevel == 1) {
6943 if ((pmap_tt2e(pmap, v) == PT_ENTRY_NULL)) {
6944 PMAP_UNLOCK(pmap);
6945 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6946 if (options & PMAP_OPTIONS_NOWAIT) {
6947 return KERN_RESOURCE_SHORTAGE;
6948 }
6949 VM_PAGE_WAIT();
6950 }
6951 PMAP_LOCK(pmap);
6952 if ((pmap_tt2e(pmap, v) == PT_ENTRY_NULL)) {
6953 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
6954 pa = kvtophys((vm_offset_t)tt_p);
6955 tte_p = pmap_tt1e( pmap, v);
6956 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6957 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
6958 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
6959 pa = 0x0ULL;
6960 tt_p = (tt_entry_t *)NULL;
6961 if ((pmap == kernel_pmap) && (VM_MIN_KERNEL_ADDRESS < 0x00000000FFFFFFFFULL)) {
6962 current_pmap()->tte[v >> ARM_TT_L1_SHIFT] = kernel_pmap->tte[v >> ARM_TT_L1_SHIFT];
6963 }
6964 }
6965 }
6966 } else if (ttlevel == 2) {
6967 if (pmap_tt3e(pmap, v) == PT_ENTRY_NULL) {
6968 PMAP_UNLOCK(pmap);
6969 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L3_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6970 if (options & PMAP_OPTIONS_NOWAIT) {
6971 return KERN_RESOURCE_SHORTAGE;
6972 }
6973 VM_PAGE_WAIT();
6974 }
6975 PMAP_LOCK(pmap);
6976 if ((pmap_tt3e(pmap, v) == PT_ENTRY_NULL)) {
6977 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L3_LEVEL, FALSE);
6978 pa = kvtophys((vm_offset_t)tt_p);
6979 tte_p = pmap_tt2e( pmap, v);
6980 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6981 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L2_OFFMASK),
6982 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L2_OFFMASK) + ARM_TT_L2_SIZE), *tte_p);
6983 pa = 0x0ULL;
6984 tt_p = (tt_entry_t *)NULL;
6985 }
6986 }
6987 }
6988
6989 PMAP_UNLOCK(pmap);
6990
6991 if (tt_p != (tt_entry_t *)NULL) {
6992 pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
6993 tt_p = (tt_entry_t *)NULL;
6994 }
6995 }
6996
6997 return KERN_SUCCESS;
6998 #endif
6999 }
7000
7001 /*
7002 * Routine: pmap_collect
7003 * Function:
7004 * Garbage collects the physical map system for
7005 * pages which are no longer used.
7006 * Success need not be guaranteed -- that is, there
7007 * may well be pages which are not referenced, but
7008 * others may be collected.
7009 */
7010 void
7011 pmap_collect(pmap_t pmap)
7012 {
7013 if (pmap == PMAP_NULL) {
7014 return;
7015 }
7016
7017 #if 0
7018 PMAP_LOCK(pmap);
7019 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
7020 /* TODO: Scan for vm page assigned to top level page tables with no reference */
7021 }
7022 PMAP_UNLOCK(pmap);
7023 #endif
7024
7025 return;
7026 }
7027
7028 /*
7029 * Routine: pmap_gc
7030 * Function:
7031 * Pmap garbage collection
7032 * Called by the pageout daemon when pages are scarce.
7033 *
7034 */
7035 void
7036 pmap_gc(
7037 void)
7038 {
7039 pmap_t pmap, pmap_next;
7040 boolean_t gc_wait;
7041
7042 if (pmap_gc_allowed &&
7043 (pmap_gc_allowed_by_time_throttle ||
7044 pmap_gc_forced)) {
7045 pmap_gc_forced = FALSE;
7046 pmap_gc_allowed_by_time_throttle = FALSE;
7047 pmap_simple_lock(&pmaps_lock);
7048 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
7049 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
7050 if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
7051 pmap->gc_status |= PMAP_GC_INFLIGHT;
7052 }
7053 pmap_simple_unlock(&pmaps_lock);
7054
7055 pmap_collect(pmap);
7056
7057 pmap_simple_lock(&pmaps_lock);
7058 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
7059 pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
7060 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
7061 if (gc_wait) {
7062 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
7063 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
7064 }
7065 pmap_simple_unlock(&pmaps_lock);
7066 thread_wakeup((event_t) &pmap->gc_status);
7067 pmap_simple_lock(&pmaps_lock);
7068 }
7069 pmap = pmap_next;
7070 }
7071 pmap_simple_unlock(&pmaps_lock);
7072 }
7073 }
7074
7075 /*
7076 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
7077 */
7078 uint64_t
7079 pmap_release_pages_fast(void)
7080 {
7081 return 0;
7082 }
7083
7084 /*
7085 * By default, don't attempt pmap GC more frequently
7086 * than once / 1 minutes.
7087 */
7088
7089 void
7090 compute_pmap_gc_throttle(
7091 void *arg __unused)
7092 {
7093 pmap_gc_allowed_by_time_throttle = TRUE;
7094 }
7095
7096 /*
7097 * pmap_attribute_cache_sync(vm_offset_t pa)
7098 *
7099 * Invalidates all of the instruction cache on a physical page and
7100 * pushes any dirty data from the data cache for the same physical page
7101 */
7102
7103 kern_return_t
7104 pmap_attribute_cache_sync(
7105 ppnum_t pp,
7106 vm_size_t size,
7107 __unused vm_machine_attribute_t attribute,
7108 __unused vm_machine_attribute_val_t * value)
7109 {
7110 if (size > PAGE_SIZE) {
7111 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
7112 } else {
7113 cache_sync_page(pp);
7114 }
7115
7116 return KERN_SUCCESS;
7117 }
7118
7119 /*
7120 * pmap_sync_page_data_phys(ppnum_t pp)
7121 *
7122 * Invalidates all of the instruction cache on a physical page and
7123 * pushes any dirty data from the data cache for the same physical page
7124 */
7125 void
7126 pmap_sync_page_data_phys(
7127 ppnum_t pp)
7128 {
7129 cache_sync_page(pp);
7130 }
7131
7132 /*
7133 * pmap_sync_page_attributes_phys(ppnum_t pp)
7134 *
7135 * Write back and invalidate all cachelines on a physical page.
7136 */
7137 void
7138 pmap_sync_page_attributes_phys(
7139 ppnum_t pp)
7140 {
7141 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
7142 }
7143
7144 #if CONFIG_COREDUMP
7145 /* temporary workaround */
7146 boolean_t
7147 coredumpok(
7148 vm_map_t map,
7149 vm_offset_t va)
7150 {
7151 pt_entry_t *pte_p;
7152 pt_entry_t spte;
7153
7154 pte_p = pmap_pte(map->pmap, va);
7155 if (0 == pte_p) {
7156 return FALSE;
7157 }
7158 spte = *pte_p;
7159 return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
7160 }
7161 #endif
7162
7163 void
7164 fillPage(
7165 ppnum_t pn,
7166 unsigned int fill)
7167 {
7168 unsigned int *addr;
7169 int count;
7170
7171 addr = (unsigned int *) phystokv(ptoa(pn));
7172 count = PAGE_SIZE / sizeof(unsigned int);
7173 while (count--) {
7174 *addr++ = fill;
7175 }
7176 }
7177
7178 extern void mapping_set_mod(ppnum_t pn);
7179
7180 void
7181 mapping_set_mod(
7182 ppnum_t pn)
7183 {
7184 pmap_set_modify(pn);
7185 }
7186
7187 extern void mapping_set_ref(ppnum_t pn);
7188
7189 void
7190 mapping_set_ref(
7191 ppnum_t pn)
7192 {
7193 pmap_set_reference(pn);
7194 }
7195
7196 /*
7197 * Clear specified attribute bits.
7198 *
7199 * Try to force an arm_fast_fault() for all mappings of
7200 * the page - to force attributes to be set again at fault time.
7201 * If the forcing succeeds, clear the cached bits at the head.
7202 * Otherwise, something must have been wired, so leave the cached
7203 * attributes alone.
7204 */
7205 MARK_AS_PMAP_TEXT static void
7206 phys_attribute_clear_internal(
7207 ppnum_t pn,
7208 unsigned int bits,
7209 int options,
7210 void *arg)
7211 {
7212 pmap_paddr_t pa = ptoa(pn);
7213 vm_prot_t allow_mode = VM_PROT_ALL;
7214
7215
7216 if ((bits & PP_ATTR_MODIFIED) &&
7217 (options & PMAP_OPTIONS_NOFLUSH) &&
7218 (arg == NULL)) {
7219 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
7220 "should not clear 'modified' without flushing TLBs\n",
7221 pn, bits, options, arg);
7222 }
7223
7224 assert(pn != vm_page_fictitious_addr);
7225
7226 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
7227 assert(bits == PP_ATTR_MODIFIED);
7228
7229 pmap_page_protect_options_internal(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0);
7230 /*
7231 * We short circuit this case; it should not need to
7232 * invoke arm_force_fast_fault, so just clear the modified bit.
7233 * pmap_page_protect has taken care of resetting
7234 * the state so that we'll see the next write as a fault to
7235 * the VM (i.e. we don't want a fast fault).
7236 */
7237 pa_clear_bits(pa, bits);
7238 return;
7239 }
7240 if (bits & PP_ATTR_REFERENCED) {
7241 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
7242 }
7243 if (bits & PP_ATTR_MODIFIED) {
7244 allow_mode &= ~VM_PROT_WRITE;
7245 }
7246
7247 if (bits == PP_ATTR_NOENCRYPT) {
7248 /*
7249 * We short circuit this case; it should not need to
7250 * invoke arm_force_fast_fault, so just clear and
7251 * return. On ARM, this bit is just a debugging aid.
7252 */
7253 pa_clear_bits(pa, bits);
7254 return;
7255 }
7256
7257 if (arm_force_fast_fault_internal(pn, allow_mode, options)) {
7258 pa_clear_bits(pa, bits);
7259 }
7260 return;
7261 }
7262
7263 static void
7264 phys_attribute_clear(
7265 ppnum_t pn,
7266 unsigned int bits,
7267 int options,
7268 void *arg)
7269 {
7270 /*
7271 * Do we really want this tracepoint? It will be extremely chatty.
7272 * Also, should we have a corresponding trace point for the set path?
7273 */
7274 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
7275
7276 phys_attribute_clear_internal(pn, bits, options, arg);
7277
7278 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
7279 }
7280
7281 /*
7282 * Set specified attribute bits.
7283 *
7284 * Set cached value in the pv head because we have
7285 * no per-mapping hardware support for referenced and
7286 * modify bits.
7287 */
7288 MARK_AS_PMAP_TEXT static void
7289 phys_attribute_set_internal(
7290 ppnum_t pn,
7291 unsigned int bits)
7292 {
7293 pmap_paddr_t pa = ptoa(pn);
7294 assert(pn != vm_page_fictitious_addr);
7295
7296
7297 pa_set_bits(pa, bits);
7298
7299 return;
7300 }
7301
7302 static void
7303 phys_attribute_set(
7304 ppnum_t pn,
7305 unsigned int bits)
7306 {
7307 phys_attribute_set_internal(pn, bits);
7308 }
7309
7310
7311 /*
7312 * Check specified attribute bits.
7313 *
7314 * use the software cached bits (since no hw support).
7315 */
7316 static boolean_t
7317 phys_attribute_test(
7318 ppnum_t pn,
7319 unsigned int bits)
7320 {
7321 pmap_paddr_t pa = ptoa(pn);
7322 assert(pn != vm_page_fictitious_addr);
7323 return pa_test_bits(pa, bits);
7324 }
7325
7326
7327 /*
7328 * Set the modify/reference bits on the specified physical page.
7329 */
7330 void
7331 pmap_set_modify(ppnum_t pn)
7332 {
7333 phys_attribute_set(pn, PP_ATTR_MODIFIED);
7334 }
7335
7336
7337 /*
7338 * Clear the modify bits on the specified physical page.
7339 */
7340 void
7341 pmap_clear_modify(
7342 ppnum_t pn)
7343 {
7344 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
7345 }
7346
7347
7348 /*
7349 * pmap_is_modified:
7350 *
7351 * Return whether or not the specified physical page is modified
7352 * by any physical maps.
7353 */
7354 boolean_t
7355 pmap_is_modified(
7356 ppnum_t pn)
7357 {
7358 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
7359 }
7360
7361
7362 /*
7363 * Set the reference bit on the specified physical page.
7364 */
7365 static void
7366 pmap_set_reference(
7367 ppnum_t pn)
7368 {
7369 phys_attribute_set(pn, PP_ATTR_REFERENCED);
7370 }
7371
7372 /*
7373 * Clear the reference bits on the specified physical page.
7374 */
7375 void
7376 pmap_clear_reference(
7377 ppnum_t pn)
7378 {
7379 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
7380 }
7381
7382
7383 /*
7384 * pmap_is_referenced:
7385 *
7386 * Return whether or not the specified physical page is referenced
7387 * by any physical maps.
7388 */
7389 boolean_t
7390 pmap_is_referenced(
7391 ppnum_t pn)
7392 {
7393 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
7394 }
7395
7396 /*
7397 * pmap_get_refmod(phys)
7398 * returns the referenced and modified bits of the specified
7399 * physical page.
7400 */
7401 unsigned int
7402 pmap_get_refmod(
7403 ppnum_t pn)
7404 {
7405 return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
7406 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
7407 }
7408
7409 /*
7410 * pmap_clear_refmod(phys, mask)
7411 * clears the referenced and modified bits as specified by the mask
7412 * of the specified physical page.
7413 */
7414 void
7415 pmap_clear_refmod_options(
7416 ppnum_t pn,
7417 unsigned int mask,
7418 unsigned int options,
7419 void *arg)
7420 {
7421 unsigned int bits;
7422
7423 bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
7424 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
7425 phys_attribute_clear(pn, bits, options, arg);
7426 }
7427
7428 void
7429 pmap_clear_refmod(
7430 ppnum_t pn,
7431 unsigned int mask)
7432 {
7433 pmap_clear_refmod_options(pn, mask, 0, NULL);
7434 }
7435
7436 unsigned int
7437 pmap_disconnect_options(
7438 ppnum_t pn,
7439 unsigned int options,
7440 void *arg)
7441 {
7442 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
7443 /*
7444 * On ARM, the "modified" bit is managed by software, so
7445 * we know up-front if the physical page is "modified",
7446 * without having to scan all the PTEs pointing to it.
7447 * The caller should have made the VM page "busy" so noone
7448 * should be able to establish any new mapping and "modify"
7449 * the page behind us.
7450 */
7451 if (pmap_is_modified(pn)) {
7452 /*
7453 * The page has been modified and will be sent to
7454 * the VM compressor.
7455 */
7456 options |= PMAP_OPTIONS_COMPRESSOR;
7457 } else {
7458 /*
7459 * The page hasn't been modified and will be freed
7460 * instead of compressed.
7461 */
7462 }
7463 }
7464
7465 /* disconnect the page */
7466 pmap_page_protect_options(pn, 0, options, arg);
7467
7468 /* return ref/chg status */
7469 return pmap_get_refmod(pn);
7470 }
7471
7472 /*
7473 * Routine:
7474 * pmap_disconnect
7475 *
7476 * Function:
7477 * Disconnect all mappings for this page and return reference and change status
7478 * in generic format.
7479 *
7480 */
7481 unsigned int
7482 pmap_disconnect(
7483 ppnum_t pn)
7484 {
7485 pmap_page_protect(pn, 0); /* disconnect the page */
7486 return pmap_get_refmod(pn); /* return ref/chg status */
7487 }
7488
7489 boolean_t
7490 pmap_has_managed_page(ppnum_t first, ppnum_t last)
7491 {
7492 if (ptoa(first) >= vm_last_phys) {
7493 return FALSE;
7494 }
7495 if (ptoa(last) < vm_first_phys) {
7496 return FALSE;
7497 }
7498
7499 return TRUE;
7500 }
7501
7502 /*
7503 * The state maintained by the noencrypt functions is used as a
7504 * debugging aid on ARM. This incurs some overhead on the part
7505 * of the caller. A special case check in phys_attribute_clear
7506 * (the most expensive path) currently minimizes this overhead,
7507 * but stubbing these functions out on RELEASE kernels yields
7508 * further wins.
7509 */
7510 boolean_t
7511 pmap_is_noencrypt(
7512 ppnum_t pn)
7513 {
7514 #if DEVELOPMENT || DEBUG
7515 boolean_t result = FALSE;
7516
7517 if (!pa_valid(ptoa(pn))) {
7518 return FALSE;
7519 }
7520
7521 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
7522
7523 return result;
7524 #else
7525 #pragma unused(pn)
7526 return FALSE;
7527 #endif
7528 }
7529
7530 void
7531 pmap_set_noencrypt(
7532 ppnum_t pn)
7533 {
7534 #if DEVELOPMENT || DEBUG
7535 if (!pa_valid(ptoa(pn))) {
7536 return;
7537 }
7538
7539 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
7540 #else
7541 #pragma unused(pn)
7542 #endif
7543 }
7544
7545 void
7546 pmap_clear_noencrypt(
7547 ppnum_t pn)
7548 {
7549 #if DEVELOPMENT || DEBUG
7550 if (!pa_valid(ptoa(pn))) {
7551 return;
7552 }
7553
7554 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
7555 #else
7556 #pragma unused(pn)
7557 #endif
7558 }
7559
7560
7561 void
7562 pmap_lock_phys_page(ppnum_t pn)
7563 {
7564 int pai;
7565 pmap_paddr_t phys = ptoa(pn);
7566
7567 if (pa_valid(phys)) {
7568 pai = (int)pa_index(phys);
7569 LOCK_PVH(pai);
7570 } else
7571 { simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
7572 }
7573
7574
7575 void
7576 pmap_unlock_phys_page(ppnum_t pn)
7577 {
7578 int pai;
7579 pmap_paddr_t phys = ptoa(pn);
7580
7581 if (pa_valid(phys)) {
7582 pai = (int)pa_index(phys);
7583 UNLOCK_PVH(pai);
7584 } else
7585 { simple_unlock(&phys_backup_lock);}
7586 }
7587
7588 MARK_AS_PMAP_TEXT static void
7589 pmap_switch_user_ttb_internal(
7590 pmap_t pmap)
7591 {
7592 VALIDATE_PMAP(pmap);
7593 pmap_cpu_data_t *cpu_data_ptr;
7594 cpu_data_ptr = pmap_get_cpu_data();
7595
7596 #if (__ARM_VMSA__ == 7)
7597
7598 if ((cpu_data_ptr->cpu_user_pmap != PMAP_NULL)
7599 && (cpu_data_ptr->cpu_user_pmap != kernel_pmap)) {
7600 unsigned int c;
7601
7602 c = hw_atomic_sub((volatile uint32_t *)&cpu_data_ptr->cpu_user_pmap->cpu_ref, 1);
7603 if ((c == 0) && (cpu_data_ptr->cpu_user_pmap->prev_tte != 0)) {
7604 /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
7605 * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
7606 tt_entry_t *tt_entry;
7607
7608 tt_entry = cpu_data_ptr->cpu_user_pmap->prev_tte;
7609 cpu_data_ptr->cpu_user_pmap->prev_tte = (tt_entry_t *) NULL;
7610 pmap_tt1_deallocate(cpu_data_ptr->cpu_user_pmap, tt_entry, ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
7611 }
7612 }
7613 cpu_data_ptr->cpu_user_pmap = pmap;
7614 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
7615 (void) hw_atomic_add((volatile uint32_t *)&pmap->cpu_ref, 1);
7616
7617 #if MACH_ASSERT && __ARM_USER_PROTECT__
7618 {
7619 unsigned int ttbr0_val, ttbr1_val;
7620 __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
7621 __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
7622 if (ttbr0_val != ttbr1_val) {
7623 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
7624 }
7625 }
7626 #endif
7627 if (pmap->tte_index_max == NTTES) {
7628 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7629 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7630 __builtin_arm_isb(ISB_SY);
7631 #if !__ARM_USER_PROTECT__
7632 set_mmu_ttb(pmap->ttep);
7633 #endif
7634 } else {
7635 #if !__ARM_USER_PROTECT__
7636 set_mmu_ttb(pmap->ttep);
7637 #endif
7638 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7639 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7640 __builtin_arm_isb(ISB_SY);
7641 #if MACH_ASSERT && __ARM_USER_PROTECT__
7642 if (pmap->ttep & 0x1000) {
7643 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
7644 }
7645 #endif
7646 }
7647
7648 #if !__ARM_USER_PROTECT__
7649 set_context_id(pmap->asid);
7650 #endif
7651
7652 #else /* (__ARM_VMSA__ == 7) */
7653
7654 if (pmap != kernel_pmap) {
7655 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
7656 }
7657
7658 if (pmap == kernel_pmap) {
7659 pmap_clear_user_ttb_internal();
7660 } else {
7661 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->asid) << TTBR_ASID_SHIFT));
7662 }
7663 #endif
7664 }
7665
7666 void
7667 pmap_switch_user_ttb(
7668 pmap_t pmap)
7669 {
7670 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
7671 pmap_switch_user_ttb_internal(pmap);
7672 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
7673 }
7674
7675 MARK_AS_PMAP_TEXT static void
7676 pmap_clear_user_ttb_internal(void)
7677 {
7678 #if (__ARM_VMSA__ > 7)
7679 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
7680 #else
7681 set_mmu_ttb(kernel_pmap->ttep);
7682 #endif
7683 }
7684
7685 void
7686 pmap_clear_user_ttb(void)
7687 {
7688 pmap_clear_user_ttb_internal();
7689 }
7690
7691 /*
7692 * Routine: arm_force_fast_fault
7693 *
7694 * Function:
7695 * Force all mappings for this page to fault according
7696 * to the access modes allowed, so we can gather ref/modify
7697 * bits again.
7698 */
7699 MARK_AS_PMAP_TEXT static boolean_t
7700 arm_force_fast_fault_internal(
7701 ppnum_t ppnum,
7702 vm_prot_t allow_mode,
7703 int options)
7704 {
7705 pmap_paddr_t phys = ptoa(ppnum);
7706 pv_entry_t *pve_p;
7707 pt_entry_t *pte_p;
7708 int pai;
7709 boolean_t result;
7710 pv_entry_t **pv_h;
7711 boolean_t is_reusable, is_internal;
7712 boolean_t tlb_flush_needed = FALSE;
7713 boolean_t ref_fault;
7714 boolean_t mod_fault;
7715
7716 assert(ppnum != vm_page_fictitious_addr);
7717
7718 if (!pa_valid(phys)) {
7719 return FALSE; /* Not a managed page. */
7720 }
7721
7722 result = TRUE;
7723 ref_fault = FALSE;
7724 mod_fault = FALSE;
7725 pai = (int)pa_index(phys);
7726 LOCK_PVH(pai);
7727 pv_h = pai_to_pvh(pai);
7728
7729 pte_p = PT_ENTRY_NULL;
7730 pve_p = PV_ENTRY_NULL;
7731 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7732 pte_p = pvh_ptep(pv_h);
7733 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7734 pve_p = pvh_list(pv_h);
7735 }
7736
7737 is_reusable = IS_REUSABLE_PAGE(pai);
7738 is_internal = IS_INTERNAL_PAGE(pai);
7739
7740 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7741 vm_map_address_t va;
7742 pt_entry_t spte;
7743 pt_entry_t tmplate;
7744 pmap_t pmap;
7745 boolean_t update_pte;
7746
7747 if (pve_p != PV_ENTRY_NULL) {
7748 pte_p = pve_get_ptep(pve_p);
7749 }
7750
7751 if (pte_p == PT_ENTRY_NULL) {
7752 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7753 }
7754 #ifdef PVH_FLAG_IOMMU
7755 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7756 goto fff_skip_pve;
7757 }
7758 #endif
7759 if (*pte_p == ARM_PTE_EMPTY) {
7760 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7761 }
7762 if (ARM_PTE_IS_COMPRESSED(*pte_p)) {
7763 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7764 }
7765
7766 pmap = ptep_get_pmap(pte_p);
7767 va = ptep_get_va(pte_p);
7768
7769 assert(va >= pmap->min && va < pmap->max);
7770
7771 if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
7772 result = FALSE;
7773 break;
7774 }
7775
7776 spte = *pte_p;
7777 tmplate = spte;
7778 update_pte = FALSE;
7779
7780 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
7781 /* read protection sets the pte to fault */
7782 tmplate = tmplate & ~ARM_PTE_AF;
7783 update_pte = TRUE;
7784 ref_fault = TRUE;
7785 }
7786 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
7787 /* take away write permission if set */
7788 if (pmap == kernel_pmap) {
7789 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
7790 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7791 pte_set_was_writeable(tmplate, true);
7792 update_pte = TRUE;
7793 mod_fault = TRUE;
7794 }
7795 } else {
7796 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
7797 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
7798 pte_set_was_writeable(tmplate, true);
7799 update_pte = TRUE;
7800 mod_fault = TRUE;
7801 }
7802 }
7803 }
7804
7805
7806 if (update_pte) {
7807 if (*pte_p != ARM_PTE_TYPE_FAULT &&
7808 !ARM_PTE_IS_COMPRESSED(*pte_p)) {
7809 WRITE_PTE_STRONG(pte_p, tmplate);
7810 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
7811 tlb_flush_needed = TRUE;
7812 } else {
7813 WRITE_PTE(pte_p, tmplate);
7814 __builtin_arm_isb(ISB_SY);
7815 }
7816 }
7817
7818 /* update pmap stats and ledgers */
7819 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7820 /*
7821 * We do not track "reusable" status for
7822 * "alternate accounting" mappings.
7823 */
7824 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7825 is_reusable &&
7826 is_internal &&
7827 pmap != kernel_pmap) {
7828 /* one less "reusable" */
7829 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7830 OSAddAtomic(-1, &pmap->stats.reusable);
7831 /* one more "internal" */
7832 OSAddAtomic(+1, &pmap->stats.internal);
7833 PMAP_STATS_PEAK(pmap->stats.internal);
7834 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7835 pmap_ledger_credit(pmap, task_ledgers.internal, machine_ptob(1));
7836 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7837 assert(IS_INTERNAL_PAGE(pai));
7838 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
7839
7840 /*
7841 * Avoid the cost of another trap to handle the fast
7842 * fault when we next write to this page: let's just
7843 * handle that now since we already have all the
7844 * necessary information.
7845 */
7846 {
7847 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
7848 }
7849 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7850 !is_reusable &&
7851 is_internal &&
7852 pmap != kernel_pmap) {
7853 /* one more "reusable" */
7854 OSAddAtomic(+1, &pmap->stats.reusable);
7855 PMAP_STATS_PEAK(pmap->stats.reusable);
7856 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7857 /* one less "internal" */
7858 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7859 OSAddAtomic(-1, &pmap->stats.internal);
7860 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(1));
7861 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7862 assert(IS_INTERNAL_PAGE(pai));
7863 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
7864 }
7865
7866 #ifdef PVH_FLAG_IOMMU
7867 fff_skip_pve:
7868 #endif
7869 pte_p = PT_ENTRY_NULL;
7870 if (pve_p != PV_ENTRY_NULL) {
7871 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7872 }
7873 }
7874
7875 if (tlb_flush_needed) {
7876 sync_tlb_flush();
7877 }
7878
7879 /* update global "reusable" status for this page */
7880 if (is_internal) {
7881 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7882 is_reusable) {
7883 CLR_REUSABLE_PAGE(pai);
7884 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7885 !is_reusable) {
7886 SET_REUSABLE_PAGE(pai);
7887 }
7888 }
7889
7890 if (mod_fault) {
7891 SET_MODFAULT_PAGE(pai);
7892 }
7893 if (ref_fault) {
7894 SET_REFFAULT_PAGE(pai);
7895 }
7896
7897 UNLOCK_PVH(pai);
7898 return result;
7899 }
7900
7901 boolean_t
7902 arm_force_fast_fault(
7903 ppnum_t ppnum,
7904 vm_prot_t allow_mode,
7905 int options,
7906 __unused void *arg)
7907 {
7908 pmap_paddr_t phys = ptoa(ppnum);
7909
7910 assert(ppnum != vm_page_fictitious_addr);
7911
7912 if (!pa_valid(phys)) {
7913 return FALSE; /* Not a managed page. */
7914 }
7915
7916 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
7917 }
7918
7919 /*
7920 * Routine: arm_clear_fast_fault
7921 *
7922 * Function:
7923 * Clear pending force fault for all mappings for this page based on
7924 * the observed fault type, update ref/modify bits.
7925 */
7926 boolean_t
7927 arm_clear_fast_fault(
7928 ppnum_t ppnum,
7929 vm_prot_t fault_type)
7930 {
7931 pmap_paddr_t pa = ptoa(ppnum);
7932 pv_entry_t *pve_p;
7933 pt_entry_t *pte_p;
7934 int pai;
7935 boolean_t result;
7936 boolean_t tlb_flush_needed = FALSE;
7937 pv_entry_t **pv_h;
7938
7939 assert(ppnum != vm_page_fictitious_addr);
7940
7941 if (!pa_valid(pa)) {
7942 return FALSE; /* Not a managed page. */
7943 }
7944
7945 result = FALSE;
7946 pai = (int)pa_index(pa);
7947 ASSERT_PVH_LOCKED(pai);
7948 pv_h = pai_to_pvh(pai);
7949
7950 pte_p = PT_ENTRY_NULL;
7951 pve_p = PV_ENTRY_NULL;
7952 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7953 pte_p = pvh_ptep(pv_h);
7954 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7955 pve_p = pvh_list(pv_h);
7956 }
7957
7958 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7959 vm_map_address_t va;
7960 pt_entry_t spte;
7961 pt_entry_t tmplate;
7962 pmap_t pmap;
7963
7964 if (pve_p != PV_ENTRY_NULL) {
7965 pte_p = pve_get_ptep(pve_p);
7966 }
7967
7968 if (pte_p == PT_ENTRY_NULL) {
7969 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7970 }
7971 #ifdef PVH_FLAG_IOMMU
7972 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7973 goto cff_skip_pve;
7974 }
7975 #endif
7976 if (*pte_p == ARM_PTE_EMPTY) {
7977 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7978 }
7979
7980 pmap = ptep_get_pmap(pte_p);
7981 va = ptep_get_va(pte_p);
7982
7983 assert(va >= pmap->min && va < pmap->max);
7984
7985 spte = *pte_p;
7986 tmplate = spte;
7987
7988 if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
7989 {
7990 if (pmap == kernel_pmap) {
7991 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
7992 } else {
7993 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWRW));
7994 }
7995 }
7996
7997 tmplate |= ARM_PTE_AF;
7998
7999 pte_set_was_writeable(tmplate, false);
8000 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8001 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
8002 tmplate = spte | ARM_PTE_AF;
8003
8004 {
8005 pa_set_bits(pa, PP_ATTR_REFERENCED);
8006 }
8007 }
8008
8009
8010 if (spte != tmplate) {
8011 if (spte != ARM_PTE_TYPE_FAULT) {
8012 WRITE_PTE_STRONG(pte_p, tmplate);
8013 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
8014 tlb_flush_needed = TRUE;
8015 } else {
8016 WRITE_PTE(pte_p, tmplate);
8017 __builtin_arm_isb(ISB_SY);
8018 }
8019 result = TRUE;
8020 }
8021
8022 #ifdef PVH_FLAG_IOMMU
8023 cff_skip_pve:
8024 #endif
8025 pte_p = PT_ENTRY_NULL;
8026 if (pve_p != PV_ENTRY_NULL) {
8027 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
8028 }
8029 }
8030 if (tlb_flush_needed) {
8031 sync_tlb_flush();
8032 }
8033 return result;
8034 }
8035
8036 /*
8037 * Determine if the fault was induced by software tracking of
8038 * modify/reference bits. If so, re-enable the mapping (and set
8039 * the appropriate bits).
8040 *
8041 * Returns KERN_SUCCESS if the fault was induced and was
8042 * successfully handled.
8043 *
8044 * Returns KERN_FAILURE if the fault was not induced and
8045 * the function was unable to deal with it.
8046 *
8047 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8048 * disallows this type of access.
8049 */
8050 MARK_AS_PMAP_TEXT static kern_return_t
8051 arm_fast_fault_internal(
8052 pmap_t pmap,
8053 vm_map_address_t va,
8054 vm_prot_t fault_type,
8055 __unused boolean_t from_user)
8056 {
8057 kern_return_t result = KERN_FAILURE;
8058 pt_entry_t *ptep;
8059 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
8060 int pai;
8061 pmap_paddr_t pa;
8062
8063 VALIDATE_PMAP(pmap);
8064
8065 PMAP_LOCK(pmap);
8066
8067 /*
8068 * If the entry doesn't exist, is completely invalid, or is already
8069 * valid, we can't fix it here.
8070 */
8071
8072 ptep = pmap_pte(pmap, va);
8073 if (ptep != PT_ENTRY_NULL) {
8074 spte = *ptep;
8075
8076 pa = pte_to_pa(spte);
8077
8078 if ((spte == ARM_PTE_TYPE_FAULT) ||
8079 ARM_PTE_IS_COMPRESSED(spte)) {
8080 PMAP_UNLOCK(pmap);
8081 return result;
8082 }
8083
8084 if (!pa_valid(pa)) {
8085 PMAP_UNLOCK(pmap);
8086 return result;
8087 }
8088 pai = (int)pa_index(pa);
8089 LOCK_PVH(pai);
8090 } else {
8091 PMAP_UNLOCK(pmap);
8092 return result;
8093 }
8094
8095
8096 if ((IS_REFFAULT_PAGE(pai)) ||
8097 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
8098 /*
8099 * An attempted access will always clear ref/mod fault state, as
8100 * appropriate for the fault type. arm_clear_fast_fault will
8101 * update the associated PTEs for the page as appropriate; if
8102 * any PTEs are updated, we redrive the access. If the mapping
8103 * does not actually allow for the attempted access, the
8104 * following fault will (hopefully) fail to update any PTEs, and
8105 * thus cause arm_fast_fault to decide that it failed to handle
8106 * the fault.
8107 */
8108 if (IS_REFFAULT_PAGE(pai)) {
8109 CLR_REFFAULT_PAGE(pai);
8110 }
8111 if ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
8112 CLR_MODFAULT_PAGE(pai);
8113 }
8114
8115 if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type)) {
8116 /*
8117 * Should this preserve KERN_PROTECTION_FAILURE? The
8118 * cost of not doing so is a another fault in a case
8119 * that should already result in an exception.
8120 */
8121 result = KERN_SUCCESS;
8122 }
8123 }
8124
8125 UNLOCK_PVH(pai);
8126 PMAP_UNLOCK(pmap);
8127 return result;
8128 }
8129
8130 kern_return_t
8131 arm_fast_fault(
8132 pmap_t pmap,
8133 vm_map_address_t va,
8134 vm_prot_t fault_type,
8135 __unused boolean_t from_user)
8136 {
8137 kern_return_t result = KERN_FAILURE;
8138
8139 if (va < pmap->min || va >= pmap->max) {
8140 return result;
8141 }
8142
8143 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
8144 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
8145 from_user);
8146
8147 #if (__ARM_VMSA__ == 7)
8148 if (pmap != kernel_pmap) {
8149 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
8150 pmap_t cur_pmap;
8151 pmap_t cur_user_pmap;
8152
8153 cur_pmap = current_pmap();
8154 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
8155
8156 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
8157 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
8158 pmap_set_pmap(pmap, current_thread());
8159 result = KERN_SUCCESS;
8160 goto done;
8161 }
8162 }
8163 }
8164 #endif
8165
8166 result = arm_fast_fault_internal(pmap, va, fault_type, from_user);
8167
8168 #if (__ARM_VMSA__ == 7)
8169 done:
8170 #endif
8171
8172 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
8173
8174 return result;
8175 }
8176
8177 void
8178 pmap_copy_page(
8179 ppnum_t psrc,
8180 ppnum_t pdst)
8181 {
8182 bcopy_phys((addr64_t) (ptoa(psrc)),
8183 (addr64_t) (ptoa(pdst)),
8184 PAGE_SIZE);
8185 }
8186
8187
8188 /*
8189 * pmap_copy_page copies the specified (machine independent) pages.
8190 */
8191 void
8192 pmap_copy_part_page(
8193 ppnum_t psrc,
8194 vm_offset_t src_offset,
8195 ppnum_t pdst,
8196 vm_offset_t dst_offset,
8197 vm_size_t len)
8198 {
8199 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
8200 (addr64_t) (ptoa(pdst) + dst_offset),
8201 len);
8202 }
8203
8204
8205 /*
8206 * pmap_zero_page zeros the specified (machine independent) page.
8207 */
8208 void
8209 pmap_zero_page(
8210 ppnum_t pn)
8211 {
8212 assert(pn != vm_page_fictitious_addr);
8213 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
8214 }
8215
8216 /*
8217 * pmap_zero_part_page
8218 * zeros the specified (machine independent) part of a page.
8219 */
8220 void
8221 pmap_zero_part_page(
8222 ppnum_t pn,
8223 vm_offset_t offset,
8224 vm_size_t len)
8225 {
8226 assert(pn != vm_page_fictitious_addr);
8227 assert(offset + len <= PAGE_SIZE);
8228 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
8229 }
8230
8231
8232 /*
8233 * nop in current arm implementation
8234 */
8235 void
8236 inval_copy_windows(
8237 __unused thread_t t)
8238 {
8239 }
8240
8241 void
8242 pmap_map_globals(
8243 void)
8244 {
8245 pt_entry_t *ptep, pte;
8246
8247 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
8248 assert(ptep != PT_ENTRY_NULL);
8249 assert(*ptep == ARM_PTE_EMPTY);
8250
8251 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
8252 #if __ARM_KERNEL_PROTECT__
8253 pte |= ARM_PTE_NG;
8254 #endif /* __ARM_KERNEL_PROTECT__ */
8255 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8256 #if (__ARM_VMSA__ > 7)
8257 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8258 #else
8259 pte |= ARM_PTE_SH;
8260 #endif
8261 *ptep = pte;
8262 FLUSH_PTE_RANGE(ptep, (ptep + 1));
8263 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE);
8264 }
8265
8266 vm_offset_t
8267 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
8268 {
8269 if (__improbable(index >= CPUWINDOWS_MAX)) {
8270 panic("%s: invalid index %u", __func__, index);
8271 }
8272 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
8273 }
8274
8275 MARK_AS_PMAP_TEXT static unsigned int
8276 pmap_map_cpu_windows_copy_internal(
8277 ppnum_t pn,
8278 vm_prot_t prot,
8279 unsigned int wimg_bits)
8280 {
8281 pt_entry_t *ptep = NULL, pte;
8282 unsigned int cpu_num;
8283 unsigned int i;
8284 vm_offset_t cpu_copywindow_vaddr = 0;
8285
8286 cpu_num = pmap_get_cpu_data()->cpu_number;
8287
8288 for (i = 0; i < CPUWINDOWS_MAX; i++) {
8289 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
8290 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8291 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
8292 if (*ptep == ARM_PTE_TYPE_FAULT) {
8293 break;
8294 }
8295 }
8296 if (i == CPUWINDOWS_MAX) {
8297 panic("pmap_map_cpu_windows_copy: out of window\n");
8298 }
8299
8300 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
8301 #if __ARM_KERNEL_PROTECT__
8302 pte |= ARM_PTE_NG;
8303 #endif /* __ARM_KERNEL_PROTECT__ */
8304
8305 pte |= wimg_to_pte(wimg_bits);
8306
8307 if (prot & VM_PROT_WRITE) {
8308 pte |= ARM_PTE_AP(AP_RWNA);
8309 } else {
8310 pte |= ARM_PTE_AP(AP_RONA);
8311 }
8312
8313 WRITE_PTE_FAST(ptep, pte);
8314 /*
8315 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8316 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8317 */
8318 FLUSH_PTE_STRONG(ptep);
8319 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE);
8320
8321 return i;
8322 }
8323
8324 unsigned int
8325 pmap_map_cpu_windows_copy(
8326 ppnum_t pn,
8327 vm_prot_t prot,
8328 unsigned int wimg_bits)
8329 {
8330 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
8331 }
8332
8333 MARK_AS_PMAP_TEXT static void
8334 pmap_unmap_cpu_windows_copy_internal(
8335 unsigned int index)
8336 {
8337 pt_entry_t *ptep;
8338 unsigned int cpu_num;
8339 vm_offset_t cpu_copywindow_vaddr = 0;
8340
8341 cpu_num = pmap_get_cpu_data()->cpu_number;
8342
8343 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
8344 /* Issue full-system DSB to ensure prior operations on the per-CPU window
8345 * (which are likely to have been on I/O memory) are complete before
8346 * tearing down the mapping. */
8347 __builtin_arm_dsb(DSB_SY);
8348 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8349 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
8350 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE);
8351 }
8352
8353 void
8354 pmap_unmap_cpu_windows_copy(
8355 unsigned int index)
8356 {
8357 return pmap_unmap_cpu_windows_copy_internal(index);
8358 }
8359
8360 /*
8361 * Indicate that a pmap is intended to be used as a nested pmap
8362 * within one or more larger address spaces. This must be set
8363 * before pmap_nest() is called with this pmap as the 'subordinate'.
8364 */
8365 MARK_AS_PMAP_TEXT static void
8366 pmap_set_nested_internal(
8367 pmap_t pmap)
8368 {
8369 VALIDATE_PMAP(pmap);
8370 pmap->nested = TRUE;
8371 }
8372
8373 void
8374 pmap_set_nested(
8375 pmap_t pmap)
8376 {
8377 pmap_set_nested_internal(pmap);
8378 }
8379
8380 /*
8381 * pmap_trim_range(pmap, start, end)
8382 *
8383 * pmap = pmap to operate on
8384 * start = start of the range
8385 * end = end of the range
8386 *
8387 * Attempts to deallocate TTEs for the given range in the nested range.
8388 */
8389 MARK_AS_PMAP_TEXT static void
8390 pmap_trim_range(
8391 pmap_t pmap,
8392 addr64_t start,
8393 addr64_t end)
8394 {
8395 addr64_t cur;
8396 addr64_t nested_region_start;
8397 addr64_t nested_region_end;
8398 addr64_t adjusted_start;
8399 addr64_t adjusted_end;
8400 addr64_t adjust_offmask;
8401 tt_entry_t * tte_p;
8402 pt_entry_t * pte_p;
8403
8404 if (__improbable(end < start)) {
8405 panic("%s: invalid address range, "
8406 "pmap=%p, start=%p, end=%p",
8407 __func__,
8408 pmap, (void*)start, (void*)end);
8409 }
8410
8411 nested_region_start = pmap->nested ? pmap->nested_region_subord_addr : pmap->nested_region_subord_addr;
8412 nested_region_end = nested_region_start + pmap->nested_region_size;
8413
8414 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
8415 panic("%s: range outside nested region %p-%p, "
8416 "pmap=%p, start=%p, end=%p",
8417 __func__, (void *)nested_region_start, (void *)nested_region_end,
8418 pmap, (void*)start, (void*)end);
8419 }
8420
8421 /* Contract the range to TT page boundaries. */
8422 #if (__ARM_VMSA__ > 7)
8423 adjust_offmask = ARM_TT_TWIG_OFFMASK;
8424 #else /* (__ARM_VMSA__ > 7) */
8425 adjust_offmask = ((ARM_TT_TWIG_SIZE * 4) - 1);
8426 #endif /* (__ARM_VMSA__ > 7) */
8427
8428 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
8429 adjusted_end = end & ~adjust_offmask;
8430
8431 /* Iterate over the range, trying to remove TTEs. */
8432 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_TWIG_SIZE) {
8433 bool modified = false;
8434
8435 PMAP_LOCK(pmap);
8436
8437 tte_p = pmap_tte(pmap, cur);
8438
8439 if (tte_p == (tt_entry_t *) NULL) {
8440 goto done;
8441 }
8442
8443 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
8444 pte_p = (pt_entry_t *) ttetokv(*tte_p);
8445
8446 #if (__ARM_VMSA__ == 7)
8447 if ((ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
8448 (pmap != kernel_pmap)) {
8449 if (pmap->nested == TRUE) {
8450 /* Deallocate for the nested map. */
8451 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
8452 } else {
8453 /* Just remove for the parent map. */
8454 pmap_tte_remove(pmap, tte_p, PMAP_TT_L1_LEVEL);
8455 }
8456
8457 flush_mmu_tlb_entry((cur & ~ARM_TT_L1_OFFMASK) | (pmap->asid & 0xff));
8458 modified = true;
8459 }
8460 #else
8461 if ((ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
8462 (pmap != kernel_pmap)) {
8463 if (pmap->nested == TRUE) {
8464 /* Deallocate for the nested map. */
8465 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
8466 } else {
8467 /* Just remove for the parent map. */
8468 pmap_tte_remove(pmap, tte_p, PMAP_TT_L2_LEVEL);
8469 }
8470
8471 flush_mmu_tlb_entry(tlbi_addr(cur & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
8472 modified = true;
8473 }
8474 #endif
8475 }
8476
8477 done:
8478 PMAP_UNLOCK(pmap);
8479
8480 if (modified) {
8481 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE);
8482 }
8483 }
8484
8485 #if (__ARM_VMSA__ > 7)
8486 /* Remove empty L2 TTs. */
8487 adjusted_start = ((start + ARM_TT_L1_OFFMASK) & ~ARM_TT_L1_OFFMASK);
8488 adjusted_end = end & ~ARM_TT_L1_OFFMASK;
8489
8490 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_L1_SIZE) {
8491 /* For each L1 entry in our range... */
8492 PMAP_LOCK(pmap);
8493
8494 bool remove_tt1e = true;
8495 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
8496 tt_entry_t * tt2e_start;
8497 tt_entry_t * tt2e_end;
8498 tt_entry_t * tt2e_p;
8499 tt_entry_t tt1e;
8500
8501 if (tt1e_p == NULL) {
8502 PMAP_UNLOCK(pmap);
8503 continue;
8504 }
8505
8506 tt1e = *tt1e_p;
8507
8508 if (tt1e == ARM_TTE_TYPE_FAULT) {
8509 PMAP_UNLOCK(pmap);
8510 continue;
8511 }
8512
8513 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
8514 tt2e_end = &tt2e_start[TTE_PGENTRIES];
8515
8516 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
8517 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
8518 /*
8519 * If any TTEs are populated, don't remove the
8520 * L1 TT.
8521 */
8522 remove_tt1e = false;
8523 }
8524 }
8525
8526 if (remove_tt1e) {
8527 pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
8528 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE);
8529 }
8530
8531 PMAP_UNLOCK(pmap);
8532 }
8533 #endif /* (__ARM_VMSA__ > 7) */
8534 }
8535
8536 /*
8537 * pmap_trim_internal(grand, subord, vstart, nstart, size)
8538 *
8539 * grand = pmap subord is nested in
8540 * subord = nested pmap
8541 * vstart = start of the used range in grand
8542 * nstart = start of the used range in nstart
8543 * size = size of the used range
8544 *
8545 * Attempts to trim the shared region page tables down to only cover the given
8546 * range in subord and grand.
8547 */
8548 MARK_AS_PMAP_TEXT static void
8549 pmap_trim_internal(
8550 pmap_t grand,
8551 pmap_t subord,
8552 addr64_t vstart,
8553 addr64_t nstart,
8554 uint64_t size)
8555 {
8556 addr64_t vend, nend;
8557 addr64_t adjust_offmask;
8558
8559 if (__improbable(os_add_overflow(vstart, size, &vend))) {
8560 panic("%s: grand addr wraps around, "
8561 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8562 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8563 }
8564
8565 if (__improbable(os_add_overflow(nstart, size, &nend))) {
8566 panic("%s: nested addr wraps around, "
8567 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8568 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8569 }
8570
8571 VALIDATE_PMAP(grand);
8572 VALIDATE_PMAP(subord);
8573
8574 PMAP_LOCK(subord);
8575
8576 if (!subord->nested) {
8577 panic("%s: subord is not nestable, "
8578 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8579 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8580 }
8581
8582 if (grand->nested) {
8583 panic("%s: grand is nestable, "
8584 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8585 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8586 }
8587
8588 if (grand->nested_pmap != subord) {
8589 panic("%s: grand->nested != subord, "
8590 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8591 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8592 }
8593
8594 if (size != 0) {
8595 if ((vstart < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8596 panic("%s: grand range not in nested region, "
8597 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8598 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8599 }
8600
8601 if ((nstart < grand->nested_region_grand_addr) || (nend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8602 panic("%s: subord range not in nested region, "
8603 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8604 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8605 }
8606 }
8607
8608
8609 if (!grand->nested_has_no_bounds_ref) {
8610 assert(subord->nested_bounds_set);
8611
8612 if (!grand->nested_bounds_set) {
8613 /* Inherit the bounds from subord. */
8614 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8615 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8616 grand->nested_bounds_set = true;
8617 }
8618
8619 PMAP_UNLOCK(subord);
8620 return;
8621 }
8622
8623 if ((!subord->nested_bounds_set) && size) {
8624 #if (__ARM_VMSA__ > 7)
8625 adjust_offmask = ARM_TT_TWIG_OFFMASK;
8626 #else /* (__ARM_VMSA__ > 7) */
8627 adjust_offmask = ((ARM_TT_TWIG_SIZE * 4) - 1);
8628 #endif /* (__ARM_VMSA__ > 7) */
8629
8630 subord->nested_region_true_start = nstart;
8631 subord->nested_region_true_end = nend;
8632 subord->nested_region_true_start &= ~adjust_offmask;
8633
8634 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
8635 panic("%s: padded true end wraps around, "
8636 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8637 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8638 }
8639
8640 subord->nested_region_true_end &= ~adjust_offmask;
8641 subord->nested_bounds_set = true;
8642 }
8643
8644 if (subord->nested_bounds_set) {
8645 /* Inherit the bounds from subord. */
8646 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8647 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8648 grand->nested_bounds_set = true;
8649
8650 /* If we know the bounds, we can trim the pmap. */
8651 grand->nested_has_no_bounds_ref = false;
8652 PMAP_UNLOCK(subord);
8653 } else {
8654 /* Don't trim if we don't know the bounds. */
8655 PMAP_UNLOCK(subord);
8656 return;
8657 }
8658
8659 /* Trim grand to only cover the given range. */
8660 pmap_trim_range(grand, grand->nested_region_grand_addr, grand->nested_region_true_start);
8661 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_grand_addr + grand->nested_region_size));
8662
8663 /* Try to trim subord. */
8664 pmap_trim_subord(subord);
8665 }
8666
8667 MARK_AS_PMAP_TEXT static void
8668 pmap_trim_self(pmap_t pmap)
8669 {
8670 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
8671 /* If we have a no bounds ref, we need to drop it. */
8672 PMAP_LOCK(pmap->nested_pmap);
8673 pmap->nested_has_no_bounds_ref = false;
8674 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
8675 vm_map_offset_t nested_region_true_start = (pmap->nested_pmap->nested_region_true_start - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8676 vm_map_offset_t nested_region_true_end = (pmap->nested_pmap->nested_region_true_end - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8677 PMAP_UNLOCK(pmap->nested_pmap);
8678
8679 if (nested_bounds_set) {
8680 pmap_trim_range(pmap, pmap->nested_region_grand_addr, nested_region_true_start);
8681 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_grand_addr + pmap->nested_region_size));
8682 }
8683 /*
8684 * Try trimming the nested pmap, in case we had the
8685 * last reference.
8686 */
8687 pmap_trim_subord(pmap->nested_pmap);
8688 }
8689 }
8690
8691 /*
8692 * pmap_trim_subord(grand, subord)
8693 *
8694 * grand = pmap that we have nested subord in
8695 * subord = nested pmap we are attempting to trim
8696 *
8697 * Trims subord if possible
8698 */
8699 MARK_AS_PMAP_TEXT static void
8700 pmap_trim_subord(pmap_t subord)
8701 {
8702 bool contract_subord = false;
8703
8704 PMAP_LOCK(subord);
8705
8706 subord->nested_no_bounds_refcnt--;
8707
8708 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
8709 /* If this was the last no bounds reference, trim subord. */
8710 contract_subord = true;
8711 }
8712
8713 PMAP_UNLOCK(subord);
8714
8715 if (contract_subord) {
8716 pmap_trim_range(subord, subord->nested_region_subord_addr, subord->nested_region_true_start);
8717 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_subord_addr + subord->nested_region_size);
8718 }
8719 }
8720
8721 void
8722 pmap_trim(
8723 pmap_t grand,
8724 pmap_t subord,
8725 addr64_t vstart,
8726 addr64_t nstart,
8727 uint64_t size)
8728 {
8729 pmap_trim_internal(grand, subord, vstart, nstart, size);
8730 }
8731
8732 /*
8733 * kern_return_t pmap_nest(grand, subord, vstart, size)
8734 *
8735 * grand = the pmap that we will nest subord into
8736 * subord = the pmap that goes into the grand
8737 * vstart = start of range in pmap to be inserted
8738 * nstart = start of range in pmap nested pmap
8739 * size = Size of nest area (up to 16TB)
8740 *
8741 * Inserts a pmap into another. This is used to implement shared segments.
8742 *
8743 */
8744
8745 MARK_AS_PMAP_TEXT static kern_return_t
8746 pmap_nest_internal(
8747 pmap_t grand,
8748 pmap_t subord,
8749 addr64_t vstart,
8750 addr64_t nstart,
8751 uint64_t size)
8752 {
8753 kern_return_t kr = KERN_FAILURE;
8754 vm_map_offset_t vaddr, nvaddr;
8755 tt_entry_t *stte_p;
8756 tt_entry_t *gtte_p;
8757 unsigned int i;
8758 unsigned int num_tte;
8759 unsigned int nested_region_asid_bitmap_size;
8760 unsigned int* nested_region_asid_bitmap;
8761 int expand_options = 0;
8762
8763 addr64_t vend, nend;
8764 if (__improbable(os_add_overflow(vstart, size, &vend))) {
8765 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
8766 }
8767 if (__improbable(os_add_overflow(nstart, size, &nend))) {
8768 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__, subord, nstart, size);
8769 }
8770 VALIDATE_PMAP(grand);
8771 VALIDATE_PMAP(subord);
8772
8773
8774 #if (__ARM_VMSA__ == 7)
8775 if (((size | vstart | nstart) & ARM_TT_L1_PT_OFFMASK) != 0x0ULL) {
8776 return KERN_INVALID_VALUE; /* Nest 4MB region */
8777 }
8778 #else
8779 if (((size | vstart | nstart) & (ARM_TT_L2_OFFMASK)) != 0x0ULL) {
8780 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
8781 }
8782 #endif
8783
8784 if (!subord->nested) {
8785 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
8786 }
8787
8788 if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
8789 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
8790 }
8791
8792 if (subord->nested_region_asid_bitmap == NULL) {
8793 nested_region_asid_bitmap_size = (unsigned int)(size >> ARM_TT_TWIG_SHIFT) / (sizeof(unsigned int) * NBBY);
8794
8795 nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size * sizeof(unsigned int));
8796 bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
8797
8798 PMAP_LOCK(subord);
8799 if (subord->nested_region_asid_bitmap == NULL) {
8800 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
8801 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
8802 subord->nested_region_subord_addr = nstart;
8803 subord->nested_region_size = (mach_vm_offset_t) size;
8804 nested_region_asid_bitmap = NULL;
8805 }
8806 PMAP_UNLOCK(subord);
8807 if (nested_region_asid_bitmap != NULL) {
8808 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
8809 }
8810 }
8811 if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
8812 uint64_t new_size;
8813 unsigned int new_nested_region_asid_bitmap_size;
8814 unsigned int* new_nested_region_asid_bitmap;
8815
8816 nested_region_asid_bitmap = NULL;
8817 nested_region_asid_bitmap_size = 0;
8818 new_size = nend - subord->nested_region_subord_addr;
8819
8820 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
8821 new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> ARM_TT_TWIG_SHIFT) / (sizeof(unsigned int) * NBBY)) + 1;
8822
8823 new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size * sizeof(unsigned int));
8824 PMAP_LOCK(subord);
8825 if (subord->nested_region_size < new_size) {
8826 bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));
8827 bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
8828 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
8829 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
8830 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
8831 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
8832 subord->nested_region_size = new_size;
8833 new_nested_region_asid_bitmap = NULL;
8834 }
8835 PMAP_UNLOCK(subord);
8836 if (nested_region_asid_bitmap != NULL)
8837 { kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));}
8838 if (new_nested_region_asid_bitmap != NULL)
8839 { kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));}
8840 }
8841
8842 PMAP_LOCK(subord);
8843 if (grand->nested_pmap == PMAP_NULL) {
8844 grand->nested_pmap = subord;
8845
8846 if (!subord->nested_bounds_set) {
8847 /*
8848 * We are nesting without the shared regions bounds
8849 * being known. We'll have to trim the pmap later.
8850 */
8851 grand->nested_has_no_bounds_ref = true;
8852 subord->nested_no_bounds_refcnt++;
8853 }
8854
8855 grand->nested_region_grand_addr = vstart;
8856 grand->nested_region_subord_addr = nstart;
8857 grand->nested_region_size = (mach_vm_offset_t) size;
8858 } else {
8859 if ((grand->nested_region_grand_addr > vstart)) {
8860 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
8861 } else if ((grand->nested_region_grand_addr + grand->nested_region_size) < vend) {
8862 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
8863 }
8864 }
8865
8866 #if (__ARM_VMSA__ == 7)
8867 nvaddr = (vm_map_offset_t) nstart;
8868 vaddr = (vm_map_offset_t) vstart;
8869 num_tte = size >> ARM_TT_L1_SHIFT;
8870
8871 for (i = 0; i < num_tte; i++) {
8872 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8873 goto expand_next;
8874 }
8875
8876 stte_p = pmap_tte(subord, nvaddr);
8877 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
8878 PMAP_UNLOCK(subord);
8879 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
8880
8881 if (kr != KERN_SUCCESS) {
8882 PMAP_LOCK(grand);
8883 goto done;
8884 }
8885
8886 PMAP_LOCK(subord);
8887 }
8888 PMAP_UNLOCK(subord);
8889 PMAP_LOCK(grand);
8890 stte_p = pmap_tte(grand, vaddr);
8891 if (stte_p == (tt_entry_t *)NULL) {
8892 PMAP_UNLOCK(grand);
8893 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
8894
8895 if (kr != KERN_SUCCESS) {
8896 PMAP_LOCK(grand);
8897 goto done;
8898 }
8899 } else {
8900 PMAP_UNLOCK(grand);
8901 kr = KERN_SUCCESS;
8902 }
8903 PMAP_LOCK(subord);
8904
8905 expand_next:
8906 nvaddr += ARM_TT_L1_SIZE;
8907 vaddr += ARM_TT_L1_SIZE;
8908 }
8909
8910 #else
8911 nvaddr = (vm_map_offset_t) nstart;
8912 num_tte = (unsigned int)(size >> ARM_TT_L2_SHIFT);
8913
8914 for (i = 0; i < num_tte; i++) {
8915 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8916 goto expand_next;
8917 }
8918
8919 stte_p = pmap_tt2e(subord, nvaddr);
8920 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
8921 PMAP_UNLOCK(subord);
8922 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L3_LEVEL);
8923
8924 if (kr != KERN_SUCCESS) {
8925 PMAP_LOCK(grand);
8926 goto done;
8927 }
8928
8929 PMAP_LOCK(subord);
8930 }
8931 expand_next:
8932 nvaddr += ARM_TT_L2_SIZE;
8933 }
8934 #endif
8935 PMAP_UNLOCK(subord);
8936
8937 /*
8938 * copy tte's from subord pmap into grand pmap
8939 */
8940
8941 PMAP_LOCK(grand);
8942 nvaddr = (vm_map_offset_t) nstart;
8943 vaddr = (vm_map_offset_t) vstart;
8944
8945
8946 #if (__ARM_VMSA__ == 7)
8947 for (i = 0; i < num_tte; i++) {
8948 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8949 goto nest_next;
8950 }
8951
8952 stte_p = pmap_tte(subord, nvaddr);
8953 gtte_p = pmap_tte(grand, vaddr);
8954 *gtte_p = *stte_p;
8955
8956 nest_next:
8957 nvaddr += ARM_TT_L1_SIZE;
8958 vaddr += ARM_TT_L1_SIZE;
8959 }
8960 #else
8961 for (i = 0; i < num_tte; i++) {
8962 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8963 goto nest_next;
8964 }
8965
8966 stte_p = pmap_tt2e(subord, nvaddr);
8967 gtte_p = pmap_tt2e(grand, vaddr);
8968 if (gtte_p == PT_ENTRY_NULL) {
8969 PMAP_UNLOCK(grand);
8970 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L2_LEVEL);
8971 PMAP_LOCK(grand);
8972
8973 if (kr != KERN_SUCCESS) {
8974 goto done;
8975 }
8976
8977 gtte_p = pmap_tt2e(grand, vaddr);
8978 }
8979 *gtte_p = *stte_p;
8980
8981 nest_next:
8982 vaddr += ARM_TT_L2_SIZE;
8983 nvaddr += ARM_TT_L2_SIZE;
8984 }
8985 #endif
8986
8987 kr = KERN_SUCCESS;
8988 done:
8989
8990 stte_p = pmap_tte(grand, vstart);
8991 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
8992
8993 #if (__ARM_VMSA__ > 7)
8994 /*
8995 * check for overflow on LP64 arch
8996 */
8997 assert((size & 0xFFFFFFFF00000000ULL) == 0);
8998 #endif
8999 PMAP_UPDATE_TLBS(grand, vstart, vend);
9000
9001 PMAP_UNLOCK(grand);
9002 return kr;
9003 }
9004
9005 kern_return_t
9006 pmap_nest(
9007 pmap_t grand,
9008 pmap_t subord,
9009 addr64_t vstart,
9010 addr64_t nstart,
9011 uint64_t size)
9012 {
9013 kern_return_t kr = KERN_FAILURE;
9014
9015 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
9016 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
9017 VM_KERNEL_ADDRHIDE(vstart));
9018
9019 kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
9020
9021 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
9022
9023 return kr;
9024 }
9025
9026 /*
9027 * kern_return_t pmap_unnest(grand, vaddr)
9028 *
9029 * grand = the pmap that will have the virtual range unnested
9030 * vaddr = start of range in pmap to be unnested
9031 * size = size of range in pmap to be unnested
9032 *
9033 */
9034
9035 kern_return_t
9036 pmap_unnest(
9037 pmap_t grand,
9038 addr64_t vaddr,
9039 uint64_t size)
9040 {
9041 return pmap_unnest_options(grand, vaddr, size, 0);
9042 }
9043
9044 MARK_AS_PMAP_TEXT static kern_return_t
9045 pmap_unnest_options_internal(
9046 pmap_t grand,
9047 addr64_t vaddr,
9048 uint64_t size,
9049 unsigned int option)
9050 {
9051 vm_map_offset_t start;
9052 vm_map_offset_t addr;
9053 tt_entry_t *tte_p;
9054 unsigned int current_index;
9055 unsigned int start_index;
9056 unsigned int max_index;
9057 unsigned int num_tte;
9058 unsigned int i;
9059
9060 addr64_t vend;
9061 if (__improbable(os_add_overflow(vaddr, size, &vend))) {
9062 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
9063 }
9064
9065 VALIDATE_PMAP(grand);
9066
9067 #if (__ARM_VMSA__ == 7)
9068 if (((size | vaddr) & ARM_TT_L1_PT_OFFMASK) != 0x0ULL) {
9069 panic("pmap_unnest(): unaligned request\n");
9070 }
9071 #else
9072 if (((size | vaddr) & ARM_TT_L2_OFFMASK) != 0x0ULL) {
9073 panic("pmap_unnest(): unaligned request\n");
9074 }
9075 #endif
9076
9077 if ((option & PMAP_UNNEST_CLEAN) == 0) {
9078 if (grand->nested_pmap == NULL) {
9079 panic("%s: %p has no nested pmap", __func__, grand);
9080 }
9081
9082 if ((vaddr < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
9083 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
9084 }
9085
9086 PMAP_LOCK(grand->nested_pmap);
9087
9088 start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr;
9089 start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> ARM_TT_TWIG_SHIFT);
9090 max_index = (unsigned int)(start_index + (size >> ARM_TT_TWIG_SHIFT));
9091 num_tte = (unsigned int)(size >> ARM_TT_TWIG_SHIFT);
9092
9093 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += ARM_TT_TWIG_SIZE) {
9094 pt_entry_t *bpte, *epte, *cpte;
9095
9096 if (addr < grand->nested_pmap->nested_region_true_start) {
9097 /* We haven't reached the interesting range. */
9098 continue;
9099 }
9100
9101 if (addr >= grand->nested_pmap->nested_region_true_end) {
9102 /* We're done with the interesting range. */
9103 break;
9104 }
9105
9106 bpte = pmap_pte(grand->nested_pmap, addr);
9107 epte = bpte + (ARM_TT_LEAF_INDEX_MASK >> ARM_TT_LEAF_SHIFT);
9108
9109 if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
9110 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
9111
9112 for (cpte = bpte; cpte <= epte; cpte++) {
9113 pmap_paddr_t pa;
9114 int pai = 0;
9115 boolean_t managed = FALSE;
9116 pt_entry_t spte;
9117
9118 if ((*cpte != ARM_PTE_TYPE_FAULT)
9119 && (!ARM_PTE_IS_COMPRESSED(*cpte))) {
9120 spte = *cpte;
9121 while (!managed) {
9122 pa = pte_to_pa(spte);
9123 if (!pa_valid(pa)) {
9124 break;
9125 }
9126 pai = (int)pa_index(pa);
9127 LOCK_PVH(pai);
9128 spte = *cpte;
9129 pa = pte_to_pa(spte);
9130 if (pai == (int)pa_index(pa)) {
9131 managed = TRUE;
9132 break; // Leave the PVH locked as we'll unlock it after we update the PTE
9133 }
9134 UNLOCK_PVH(pai);
9135 }
9136
9137 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
9138 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
9139 }
9140
9141 if (managed) {
9142 ASSERT_PVH_LOCKED(pai);
9143 UNLOCK_PVH(pai);
9144 }
9145 }
9146 }
9147 }
9148
9149 FLUSH_PTE_RANGE_STRONG(bpte, epte);
9150 flush_mmu_tlb_region_asid_async(start, (unsigned)size, grand->nested_pmap);
9151 }
9152
9153 sync_tlb_flush();
9154
9155 PMAP_UNLOCK(grand->nested_pmap);
9156 }
9157
9158 PMAP_LOCK(grand);
9159
9160 /*
9161 * invalidate all pdes for segment at vaddr in pmap grand
9162 */
9163 start = vaddr;
9164 addr = vaddr;
9165
9166 num_tte = (unsigned int)(size >> ARM_TT_TWIG_SHIFT);
9167
9168 for (i = 0; i < num_tte; i++, addr += ARM_TT_TWIG_SIZE) {
9169 if (addr < grand->nested_pmap->nested_region_true_start) {
9170 /* We haven't reached the interesting range. */
9171 continue;
9172 }
9173
9174 if (addr >= grand->nested_pmap->nested_region_true_end) {
9175 /* We're done with the interesting range. */
9176 break;
9177 }
9178
9179 tte_p = pmap_tte(grand, addr);
9180 *tte_p = ARM_TTE_TYPE_FAULT;
9181 }
9182
9183 tte_p = pmap_tte(grand, start);
9184 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
9185 PMAP_UPDATE_TLBS(grand, start, vend);
9186
9187 PMAP_UNLOCK(grand);
9188
9189 return KERN_SUCCESS;
9190 }
9191
9192 kern_return_t
9193 pmap_unnest_options(
9194 pmap_t grand,
9195 addr64_t vaddr,
9196 uint64_t size,
9197 unsigned int option)
9198 {
9199 kern_return_t kr = KERN_FAILURE;
9200
9201 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
9202 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
9203
9204 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
9205
9206 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
9207
9208 return kr;
9209 }
9210
9211 boolean_t
9212 pmap_adjust_unnest_parameters(
9213 __unused pmap_t p,
9214 __unused vm_map_offset_t *s,
9215 __unused vm_map_offset_t *e)
9216 {
9217 return TRUE; /* to get to log_unnest_badness()... */
9218 }
9219
9220 /*
9221 * disable no-execute capability on
9222 * the specified pmap
9223 */
9224 #if DEVELOPMENT || DEBUG
9225 void
9226 pmap_disable_NX(
9227 pmap_t pmap)
9228 {
9229 pmap->nx_enabled = FALSE;
9230 }
9231 #else
9232 void
9233 pmap_disable_NX(
9234 __unused pmap_t pmap)
9235 {
9236 }
9237 #endif
9238
9239 void
9240 pt_fake_zone_init(
9241 int zone_index)
9242 {
9243 pt_fake_zone_index = zone_index;
9244 }
9245
9246 void
9247 pt_fake_zone_info(
9248 int *count,
9249 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
9250 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
9251 {
9252 *count = inuse_pmap_pages_count;
9253 *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
9254 *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
9255 *elem_size = PAGE_SIZE;
9256 *alloc_size = PAGE_SIZE;
9257 *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
9258
9259 *collectable = 1;
9260 *exhaustable = 0;
9261 *caller_acct = 1;
9262 }
9263
9264 /*
9265 * flush a range of hardware TLB entries.
9266 * NOTE: assumes the smallest TLB entry in use will be for
9267 * an ARM small page (4K).
9268 */
9269
9270 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9271 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
9272
9273 static void
9274 flush_mmu_tlb_region_asid_async(
9275 vm_offset_t va,
9276 unsigned length,
9277 pmap_t pmap)
9278 {
9279 #if (__ARM_VMSA__ == 7)
9280 vm_offset_t end = va + length;
9281 uint32_t asid;
9282
9283 asid = pmap->asid;
9284
9285 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
9286 boolean_t flush_all = FALSE;
9287
9288 if ((asid == 0) || (pmap->nested == TRUE)) {
9289 flush_all = TRUE;
9290 }
9291 if (flush_all) {
9292 flush_mmu_tlb_async();
9293 } else {
9294 flush_mmu_tlb_asid_async(asid);
9295 }
9296
9297 return;
9298 }
9299 if (pmap->nested == TRUE) {
9300 #if !__ARM_MP_EXT__
9301 flush_mmu_tlb();
9302 #else
9303 va = arm_trunc_page(va);
9304 while (va < end) {
9305 flush_mmu_tlb_mva_entries_async(va);
9306 va += ARM_SMALL_PAGE_SIZE;
9307 }
9308 #endif
9309 return;
9310 }
9311 va = arm_trunc_page(va) | (asid & 0xff);
9312 flush_mmu_tlb_entries_async(va, end);
9313
9314 #else
9315 vm_offset_t end = va + length;
9316 uint32_t asid;
9317
9318 asid = pmap->asid;
9319
9320 if ((length >> ARM_TT_L3_SHIFT) > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
9321 boolean_t flush_all = FALSE;
9322
9323 if ((asid == 0) || (pmap->nested == TRUE)) {
9324 flush_all = TRUE;
9325 }
9326 if (flush_all) {
9327 flush_mmu_tlb_async();
9328 } else {
9329 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
9330 }
9331 return;
9332 }
9333 va = tlbi_asid(asid) | tlbi_addr(va);
9334 end = tlbi_asid(asid) | tlbi_addr(end);
9335 if (pmap->nested == TRUE) {
9336 flush_mmu_tlb_allentries_async(va, end);
9337 } else {
9338 flush_mmu_tlb_entries_async(va, end);
9339 }
9340
9341 #endif
9342 }
9343
9344 void
9345 flush_mmu_tlb_region(
9346 vm_offset_t va,
9347 unsigned length)
9348 {
9349 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
9350 sync_tlb_flush();
9351 }
9352
9353 static unsigned int
9354 pmap_find_io_attr(pmap_paddr_t paddr)
9355 {
9356 pmap_io_range_t find_range = {.addr = paddr, .len = PAGE_SIZE};
9357 unsigned int begin = 0, end = num_io_rgns - 1;
9358 assert(num_io_rgns > 0);
9359
9360 for (;;) {
9361 unsigned int middle = (begin + end) / 2;
9362 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
9363 if (cmp == 0) {
9364 return io_attr_table[middle].wimg;
9365 } else if (begin == end) {
9366 break;
9367 } else if (cmp > 0) {
9368 begin = middle + 1;
9369 } else {
9370 end = middle;
9371 }
9372 }
9373 ;
9374
9375 return VM_WIMG_IO;
9376 }
9377
9378 unsigned int
9379 pmap_cache_attributes(
9380 ppnum_t pn)
9381 {
9382 pmap_paddr_t paddr;
9383 int pai;
9384 unsigned int result;
9385 pp_attr_t pp_attr_current;
9386
9387 paddr = ptoa(pn);
9388
9389 if ((paddr >= io_rgn_start) && (paddr < io_rgn_end)) {
9390 return pmap_find_io_attr(paddr);
9391 }
9392
9393 if (!pmap_initialized) {
9394 if ((paddr >= gPhysBase) && (paddr < gPhysBase + gPhysSize)) {
9395 return VM_WIMG_DEFAULT;
9396 } else {
9397 return VM_WIMG_IO;
9398 }
9399 }
9400
9401
9402 if (!pa_valid(paddr)) {
9403 return VM_WIMG_IO;
9404 }
9405
9406 result = VM_WIMG_DEFAULT;
9407
9408 pai = (int)pa_index(paddr);
9409
9410 pp_attr_current = pp_attr_table[pai];
9411 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9412 result = pp_attr_current & PP_ATTR_WIMG_MASK;
9413 }
9414 return result;
9415 }
9416
9417 MARK_AS_PMAP_TEXT static void
9418 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
9419 {
9420 if ((wimg_bits_prev != wimg_bits_new)
9421 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9422 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9423 && (wimg_bits_new != VM_WIMG_COPYBACK))
9424 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9425 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
9426 pmap_sync_page_attributes_phys(pn);
9427 }
9428
9429 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
9430 pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
9431 }
9432 }
9433
9434 MARK_AS_PMAP_TEXT static __unused void
9435 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
9436 {
9437 pmap_paddr_t paddr = ptoa(pn);
9438 int pai = (int)pa_index(paddr);
9439
9440 if (__improbable(!pa_valid(paddr))) {
9441 panic("%s called on non-managed page 0x%08x", __func__, pn);
9442 }
9443
9444 LOCK_PVH(pai);
9445
9446
9447 pmap_update_cache_attributes_locked(pn, new_cacheattr);
9448
9449 UNLOCK_PVH(pai);
9450
9451 pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
9452 }
9453
9454 void *
9455 pmap_map_compressor_page(ppnum_t pn)
9456 {
9457 #if __ARM_PTE_PHYSMAP__
9458 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
9459 if (cacheattr != VM_WIMG_DEFAULT) {
9460 pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
9461 }
9462 #endif
9463 return (void*)phystokv(ptoa(pn));
9464 }
9465
9466 void
9467 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
9468 {
9469 #if __ARM_PTE_PHYSMAP__
9470 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
9471 if (cacheattr != VM_WIMG_DEFAULT) {
9472 pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
9473 }
9474 #endif
9475 }
9476
9477 MARK_AS_PMAP_TEXT static boolean_t
9478 pmap_batch_set_cache_attributes_internal(
9479 ppnum_t pn,
9480 unsigned int cacheattr,
9481 unsigned int page_cnt,
9482 unsigned int page_index,
9483 boolean_t doit,
9484 unsigned int *res)
9485 {
9486 pmap_paddr_t paddr;
9487 int pai;
9488 pp_attr_t pp_attr_current;
9489 pp_attr_t pp_attr_template;
9490 unsigned int wimg_bits_prev, wimg_bits_new;
9491
9492 if (cacheattr & VM_WIMG_USE_DEFAULT) {
9493 cacheattr = VM_WIMG_DEFAULT;
9494 }
9495
9496 if ((doit == FALSE) && (*res == 0)) {
9497 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9498 *res = page_cnt;
9499 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9500 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
9501 return FALSE;
9502 }
9503 }
9504
9505 paddr = ptoa(pn);
9506
9507 if (!pa_valid(paddr)) {
9508 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
9509 }
9510
9511 pai = (int)pa_index(paddr);
9512
9513 if (doit) {
9514 LOCK_PVH(pai);
9515 }
9516
9517 do {
9518 pp_attr_current = pp_attr_table[pai];
9519 wimg_bits_prev = VM_WIMG_DEFAULT;
9520 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9521 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9522 }
9523
9524 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
9525
9526 if (!doit) {
9527 break;
9528 }
9529
9530 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9531 * to avoid losing simultaneous updates to other bits like refmod. */
9532 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9533
9534 wimg_bits_new = VM_WIMG_DEFAULT;
9535 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
9536 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9537 }
9538
9539 if (doit) {
9540 if (wimg_bits_new != wimg_bits_prev) {
9541 pmap_update_cache_attributes_locked(pn, cacheattr);
9542 }
9543 UNLOCK_PVH(pai);
9544 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
9545 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
9546 }
9547 } else {
9548 if (wimg_bits_new == VM_WIMG_COPYBACK) {
9549 return FALSE;
9550 }
9551 if (wimg_bits_prev == wimg_bits_new) {
9552 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9553 *res = *res - 1;
9554 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9555 if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
9556 return FALSE;
9557 }
9558 }
9559 return TRUE;
9560 }
9561
9562 if (page_cnt == (page_index + 1)) {
9563 wimg_bits_prev = VM_WIMG_COPYBACK;
9564 if (((wimg_bits_prev != wimg_bits_new))
9565 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9566 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9567 && (wimg_bits_new != VM_WIMG_COPYBACK))
9568 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9569 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
9570 platform_cache_flush_wimg(wimg_bits_new);
9571 }
9572 }
9573
9574 return TRUE;
9575 };
9576
9577 boolean_t
9578 pmap_batch_set_cache_attributes(
9579 ppnum_t pn,
9580 unsigned int cacheattr,
9581 unsigned int page_cnt,
9582 unsigned int page_index,
9583 boolean_t doit,
9584 unsigned int *res)
9585 {
9586 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
9587 }
9588
9589 MARK_AS_PMAP_TEXT static void
9590 pmap_set_cache_attributes_priv(
9591 ppnum_t pn,
9592 unsigned int cacheattr,
9593 boolean_t external __unused)
9594 {
9595 pmap_paddr_t paddr;
9596 int pai;
9597 pp_attr_t pp_attr_current;
9598 pp_attr_t pp_attr_template;
9599 unsigned int wimg_bits_prev, wimg_bits_new;
9600
9601 paddr = ptoa(pn);
9602
9603 if (!pa_valid(paddr)) {
9604 return; /* Not a managed page. */
9605 }
9606
9607 if (cacheattr & VM_WIMG_USE_DEFAULT) {
9608 cacheattr = VM_WIMG_DEFAULT;
9609 }
9610
9611 pai = (int)pa_index(paddr);
9612
9613 LOCK_PVH(pai);
9614
9615
9616 do {
9617 pp_attr_current = pp_attr_table[pai];
9618 wimg_bits_prev = VM_WIMG_DEFAULT;
9619 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9620 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9621 }
9622
9623 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
9624
9625 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9626 * to avoid losing simultaneous updates to other bits like refmod. */
9627 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9628
9629 wimg_bits_new = VM_WIMG_DEFAULT;
9630 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
9631 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9632 }
9633
9634 if (wimg_bits_new != wimg_bits_prev) {
9635 pmap_update_cache_attributes_locked(pn, cacheattr);
9636 }
9637
9638 UNLOCK_PVH(pai);
9639
9640 pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
9641 }
9642
9643 MARK_AS_PMAP_TEXT static void
9644 pmap_set_cache_attributes_internal(
9645 ppnum_t pn,
9646 unsigned int cacheattr)
9647 {
9648 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
9649 }
9650
9651 void
9652 pmap_set_cache_attributes(
9653 ppnum_t pn,
9654 unsigned int cacheattr)
9655 {
9656 pmap_set_cache_attributes_internal(pn, cacheattr);
9657 }
9658
9659 void
9660 pmap_update_cache_attributes_locked(
9661 ppnum_t ppnum,
9662 unsigned attributes)
9663 {
9664 pmap_paddr_t phys = ptoa(ppnum);
9665 pv_entry_t *pve_p;
9666 pt_entry_t *pte_p;
9667 pv_entry_t **pv_h;
9668 pt_entry_t tmplate;
9669 unsigned int pai;
9670 boolean_t tlb_flush_needed = FALSE;
9671
9672 #if __ARM_PTE_PHYSMAP__
9673 vm_offset_t kva = phystokv(phys);
9674 pte_p = pmap_pte(kernel_pmap, kva);
9675
9676 tmplate = *pte_p;
9677 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9678 tmplate |= wimg_to_pte(attributes);
9679 #if (__ARM_VMSA__ > 7)
9680 if (tmplate & ARM_PTE_HINT_MASK) {
9681 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
9682 __FUNCTION__, pte_p, (void *)kva, tmplate);
9683 }
9684 #endif
9685 WRITE_PTE_STRONG(pte_p, tmplate);
9686 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
9687 tlb_flush_needed = TRUE;
9688 #endif
9689
9690 pai = (unsigned int)pa_index(phys);
9691
9692 pv_h = pai_to_pvh(pai);
9693
9694 pte_p = PT_ENTRY_NULL;
9695 pve_p = PV_ENTRY_NULL;
9696 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
9697 pte_p = pvh_ptep(pv_h);
9698 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
9699 pve_p = pvh_list(pv_h);
9700 pte_p = PT_ENTRY_NULL;
9701 }
9702
9703 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9704 vm_map_address_t va;
9705 pmap_t pmap;
9706
9707 if (pve_p != PV_ENTRY_NULL) {
9708 pte_p = pve_get_ptep(pve_p);
9709 }
9710 #ifdef PVH_FLAG_IOMMU
9711 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
9712 goto cache_skip_pve;
9713 }
9714 #endif
9715 pmap = ptep_get_pmap(pte_p);
9716 va = ptep_get_va(pte_p);
9717
9718 tmplate = *pte_p;
9719 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9720 tmplate |= wimg_to_pte(attributes);
9721
9722 WRITE_PTE_STRONG(pte_p, tmplate);
9723 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
9724 tlb_flush_needed = TRUE;
9725
9726 #ifdef PVH_FLAG_IOMMU
9727 cache_skip_pve:
9728 #endif
9729 pte_p = PT_ENTRY_NULL;
9730 if (pve_p != PV_ENTRY_NULL) {
9731 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
9732 }
9733 }
9734 if (tlb_flush_needed) {
9735 sync_tlb_flush();
9736 }
9737 }
9738
9739 #if (__ARM_VMSA__ == 7)
9740 vm_map_address_t
9741 pmap_create_sharedpage(
9742 void)
9743 {
9744 pmap_paddr_t pa;
9745 kern_return_t kr;
9746
9747 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
9748 memset((char *) phystokv(pa), 0, PAGE_SIZE);
9749
9750 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9751 assert(kr == KERN_SUCCESS);
9752
9753 return (vm_map_address_t)phystokv(pa);
9754 }
9755 #else
9756 static void
9757 pmap_update_tt3e(
9758 pmap_t pmap,
9759 vm_address_t address,
9760 tt_entry_t template)
9761 {
9762 tt_entry_t *ptep, pte;
9763
9764 ptep = pmap_tt3e(pmap, address);
9765 if (ptep == NULL) {
9766 panic("%s: no ptep?\n", __FUNCTION__);
9767 }
9768
9769 pte = *ptep;
9770 pte = tte_to_pa(pte) | template;
9771 WRITE_PTE_STRONG(ptep, pte);
9772 }
9773
9774 /* Note absence of non-global bit */
9775 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
9776 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
9777 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
9778 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
9779
9780 vm_map_address_t
9781 pmap_create_sharedpage(
9782 void
9783 )
9784 {
9785 kern_return_t kr;
9786 pmap_paddr_t pa = 0;
9787
9788
9789 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
9790
9791 memset((char *) phystokv(pa), 0, PAGE_SIZE);
9792
9793 #ifdef CONFIG_XNUPOST
9794 /*
9795 * The kernel pmap maintains a user accessible mapping of the commpage
9796 * to test PAN.
9797 */
9798 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9799 assert(kr == KERN_SUCCESS);
9800
9801 /*
9802 * This mapping should not be global (as we only expect to reference it
9803 * during testing).
9804 */
9805 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
9806
9807 #if KASAN
9808 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
9809 #endif
9810 #endif /* CONFIG_XNUPOST */
9811
9812 /*
9813 * In order to avoid burning extra pages on mapping the shared page, we
9814 * create a dedicated pmap for the shared page. We forcibly nest the
9815 * translation tables from this pmap into other pmaps. The level we
9816 * will nest at depends on the MMU configuration (page size, TTBR range,
9817 * etc).
9818 *
9819 * Note that this is NOT "the nested pmap" (which is used to nest the
9820 * shared cache).
9821 *
9822 * Note that we update parameters of the entry for our unique needs (NG
9823 * entry, etc.).
9824 */
9825 sharedpage_pmap = pmap_create(NULL, 0x0, FALSE);
9826 assert(sharedpage_pmap != NULL);
9827
9828 /* The user 64-bit mapping... */
9829 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9830 assert(kr == KERN_SUCCESS);
9831 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
9832
9833 /* ...and the user 32-bit mapping. */
9834 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9835 assert(kr == KERN_SUCCESS);
9836 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
9837
9838 /* For manipulation in kernel, go straight to physical page */
9839 return (vm_map_address_t)phystokv(pa);
9840 }
9841
9842 /*
9843 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
9844 * with user controlled TTEs.
9845 */
9846 #if (ARM_PGSHIFT == 14) || __ARM64_TWO_LEVEL_PMAP__
9847 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
9848 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
9849 #elif (ARM_PGSHIFT == 12)
9850 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
9851 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
9852 #else
9853 #error Nested shared page mapping is unsupported on this config
9854 #endif
9855
9856 MARK_AS_PMAP_TEXT static kern_return_t
9857 pmap_insert_sharedpage_internal(
9858 pmap_t pmap)
9859 {
9860 kern_return_t kr = KERN_SUCCESS;
9861 vm_offset_t sharedpage_vaddr;
9862 pt_entry_t *ttep, *src_ttep;
9863 int options = 0;
9864
9865 VALIDATE_PMAP(pmap);
9866
9867 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9868 #error We assume a single page.
9869 #endif
9870
9871 if (pmap_is_64bit(pmap)) {
9872 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
9873 } else {
9874 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
9875 }
9876
9877 PMAP_LOCK(pmap);
9878
9879 /*
9880 * For 4KB pages, we can force the commpage to nest at the level one
9881 * page table, as each entry is 1GB (i.e, there will be no overlap
9882 * with regular userspace mappings). For 16KB pages, each level one
9883 * entry is 64GB, so we must go to the second level entry (32MB) in
9884 * order to nest.
9885 */
9886 #if (ARM_PGSHIFT == 12)
9887 #if __ARM64_TWO_LEVEL_PMAP__
9888 #error A two level page table with a page shift of 12 is not currently supported
9889 #endif
9890 (void)options;
9891
9892 /* Just slam in the L1 entry. */
9893 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
9894
9895 if (*ttep != ARM_PTE_EMPTY) {
9896 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
9897 }
9898
9899 src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
9900 #elif (ARM_PGSHIFT == 14)
9901 #if !__ARM64_TWO_LEVEL_PMAP__
9902 /* Allocate for the L2 entry if necessary, and slam it into place. */
9903 /*
9904 * As long as we are use a three level page table, the first level
9905 * should always exist, so we don't need to check for it.
9906 */
9907 while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
9908 PMAP_UNLOCK(pmap);
9909
9910 kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
9911
9912 if (kr != KERN_SUCCESS) {
9913 {
9914 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
9915 }
9916 }
9917
9918 PMAP_LOCK(pmap);
9919 }
9920 #endif
9921
9922 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
9923
9924 if (*ttep != ARM_PTE_EMPTY) {
9925 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
9926 }
9927
9928 src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
9929 #endif
9930
9931 *ttep = *src_ttep;
9932 FLUSH_PTE_STRONG(ttep);
9933
9934 /* TODO: Should we flush in the 64-bit case? */
9935 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
9936
9937 #if (ARM_PGSHIFT == 12) && !__ARM64_TWO_LEVEL_PMAP__
9938 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->asid));
9939 #elif (ARM_PGSHIFT == 14)
9940 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
9941 #endif
9942 sync_tlb_flush();
9943
9944 PMAP_UNLOCK(pmap);
9945
9946 return kr;
9947 }
9948
9949 static void
9950 pmap_unmap_sharedpage(
9951 pmap_t pmap)
9952 {
9953 pt_entry_t *ttep;
9954 vm_offset_t sharedpage_vaddr;
9955
9956 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9957 #error We assume a single page.
9958 #endif
9959
9960 if (pmap_is_64bit(pmap)) {
9961 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
9962 } else {
9963 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
9964 }
9965
9966 #if (ARM_PGSHIFT == 12)
9967 #if __ARM64_TWO_LEVEL_PMAP__
9968 #error A two level page table with a page shift of 12 is not currently supported
9969 #endif
9970 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
9971
9972 if (ttep == NULL) {
9973 return;
9974 }
9975
9976 /* It had better be mapped to the shared page */
9977 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
9978 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
9979 }
9980 #elif (ARM_PGSHIFT == 14)
9981 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
9982
9983 if (ttep == NULL) {
9984 return;
9985 }
9986
9987 /* It had better be mapped to the shared page */
9988 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
9989 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
9990 }
9991 #endif
9992
9993 *ttep = ARM_TTE_EMPTY;
9994 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
9995
9996 #if (ARM_PGSHIFT == 12)
9997 #if __ARM64_TWO_LEVEL_PMAP__
9998 #error A two level page table with a page shift of 12 is not currently supported
9999 #endif
10000 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->asid));
10001 #elif (ARM_PGSHIFT == 14)
10002 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
10003 #endif
10004 sync_tlb_flush();
10005 }
10006
10007 void
10008 pmap_insert_sharedpage(
10009 pmap_t pmap)
10010 {
10011 pmap_insert_sharedpage_internal(pmap);
10012 }
10013
10014 static boolean_t
10015 pmap_is_64bit(
10016 pmap_t pmap)
10017 {
10018 return pmap->is_64bit;
10019 }
10020
10021 #endif
10022
10023 /* ARMTODO -- an implementation that accounts for
10024 * holes in the physical map, if any.
10025 */
10026 boolean_t
10027 pmap_valid_page(
10028 ppnum_t pn)
10029 {
10030 return pa_valid(ptoa(pn));
10031 }
10032
10033 MARK_AS_PMAP_TEXT static boolean_t
10034 pmap_is_empty_internal(
10035 pmap_t pmap,
10036 vm_map_offset_t va_start,
10037 vm_map_offset_t va_end)
10038 {
10039 vm_map_offset_t block_start, block_end;
10040 tt_entry_t *tte_p;
10041
10042 if (pmap == NULL) {
10043 return TRUE;
10044 }
10045
10046 VALIDATE_PMAP(pmap);
10047
10048 if ((pmap != kernel_pmap) && (not_in_kdp)) {
10049 PMAP_LOCK(pmap);
10050 }
10051
10052 #if (__ARM_VMSA__ == 7)
10053 if (tte_index(pmap, va_end) >= pmap->tte_index_max) {
10054 if ((pmap != kernel_pmap) && (not_in_kdp)) {
10055 PMAP_UNLOCK(pmap);
10056 }
10057 return TRUE;
10058 }
10059
10060 block_start = va_start;
10061 tte_p = pmap_tte(pmap, block_start);
10062 while (block_start < va_end) {
10063 block_end = (block_start + ARM_TT_L1_SIZE) & ~(ARM_TT_L1_OFFMASK);
10064 if (block_end > va_end) {
10065 block_end = va_end;
10066 }
10067
10068 if ((*tte_p & ARM_TTE_TYPE_MASK) != 0) {
10069 vm_map_offset_t offset;
10070 ppnum_t phys_page = 0;
10071
10072 for (offset = block_start;
10073 offset < block_end;
10074 offset += ARM_PGBYTES) {
10075 // This does a pmap_find_phys() lookup but assumes lock is held
10076 phys_page = pmap_vtophys(pmap, offset);
10077 if (phys_page) {
10078 if ((pmap != kernel_pmap) && (not_in_kdp)) {
10079 PMAP_UNLOCK(pmap);
10080 }
10081 return FALSE;
10082 }
10083 }
10084 }
10085
10086 block_start = block_end;
10087 tte_p++;
10088 }
10089 #else
10090 block_start = va_start;
10091
10092 while (block_start < va_end) {
10093 pt_entry_t *bpte_p, *epte_p;
10094 pt_entry_t *pte_p;
10095
10096 block_end = (block_start + ARM_TT_L2_SIZE) & ~ARM_TT_L2_OFFMASK;
10097 if (block_end > va_end) {
10098 block_end = va_end;
10099 }
10100
10101 tte_p = pmap_tt2e(pmap, block_start);
10102 if ((tte_p != PT_ENTRY_NULL)
10103 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
10104 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10105 bpte_p = &pte_p[tt3_index(pmap, block_start)];
10106 epte_p = bpte_p + (((block_end - block_start) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
10107
10108 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
10109 if (*pte_p != ARM_PTE_EMPTY) {
10110 if ((pmap != kernel_pmap) && (not_in_kdp)) {
10111 PMAP_UNLOCK(pmap);
10112 }
10113 return FALSE;
10114 }
10115 }
10116 }
10117 block_start = block_end;
10118 }
10119 #endif
10120
10121 if ((pmap != kernel_pmap) && (not_in_kdp)) {
10122 PMAP_UNLOCK(pmap);
10123 }
10124
10125 return TRUE;
10126 }
10127
10128 boolean_t
10129 pmap_is_empty(
10130 pmap_t pmap,
10131 vm_map_offset_t va_start,
10132 vm_map_offset_t va_end)
10133 {
10134 return pmap_is_empty_internal(pmap, va_start, va_end);
10135 }
10136
10137 vm_map_offset_t
10138 pmap_max_offset(
10139 boolean_t is64,
10140 unsigned int option)
10141 {
10142 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
10143 }
10144
10145 vm_map_offset_t
10146 pmap_max_64bit_offset(
10147 __unused unsigned int option)
10148 {
10149 vm_map_offset_t max_offset_ret = 0;
10150
10151 #if defined(__arm64__)
10152 const vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
10153 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
10154 max_offset_ret = arm64_pmap_max_offset_default;
10155 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
10156 max_offset_ret = min_max_offset;
10157 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
10158 max_offset_ret = MACH_VM_MAX_ADDRESS;
10159 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
10160 if (arm64_pmap_max_offset_default) {
10161 max_offset_ret = arm64_pmap_max_offset_default;
10162 } else if (max_mem > 0xC0000000) {
10163 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
10164 } else if (max_mem > 0x40000000) {
10165 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
10166 } else {
10167 max_offset_ret = min_max_offset;
10168 }
10169 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
10170 if (arm64_pmap_max_offset_default) {
10171 // Allow the boot-arg to override jumbo size
10172 max_offset_ret = arm64_pmap_max_offset_default;
10173 } else {
10174 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
10175 }
10176 } else {
10177 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
10178 }
10179
10180 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
10181 assert(max_offset_ret >= min_max_offset);
10182 #else
10183 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
10184 #endif
10185
10186 return max_offset_ret;
10187 }
10188
10189 vm_map_offset_t
10190 pmap_max_32bit_offset(
10191 unsigned int option)
10192 {
10193 vm_map_offset_t max_offset_ret = 0;
10194
10195 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
10196 max_offset_ret = arm_pmap_max_offset_default;
10197 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
10198 max_offset_ret = 0x66000000;
10199 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
10200 max_offset_ret = VM_MAX_ADDRESS;
10201 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
10202 if (arm_pmap_max_offset_default) {
10203 max_offset_ret = arm_pmap_max_offset_default;
10204 } else if (max_mem > 0x20000000) {
10205 max_offset_ret = 0x80000000;
10206 } else {
10207 max_offset_ret = 0x66000000;
10208 }
10209 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
10210 max_offset_ret = 0x80000000;
10211 } else {
10212 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
10213 }
10214
10215 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
10216 return max_offset_ret;
10217 }
10218
10219 #if CONFIG_DTRACE
10220 /*
10221 * Constrain DTrace copyin/copyout actions
10222 */
10223 extern kern_return_t dtrace_copyio_preflight(addr64_t);
10224 extern kern_return_t dtrace_copyio_postflight(addr64_t);
10225
10226 kern_return_t
10227 dtrace_copyio_preflight(
10228 __unused addr64_t va)
10229 {
10230 if (current_map() == kernel_map) {
10231 return KERN_FAILURE;
10232 } else {
10233 return KERN_SUCCESS;
10234 }
10235 }
10236
10237 kern_return_t
10238 dtrace_copyio_postflight(
10239 __unused addr64_t va)
10240 {
10241 return KERN_SUCCESS;
10242 }
10243 #endif /* CONFIG_DTRACE */
10244
10245
10246 void
10247 pmap_flush_context_init(__unused pmap_flush_context *pfc)
10248 {
10249 }
10250
10251
10252 void
10253 pmap_flush(
10254 __unused pmap_flush_context *cpus_to_flush)
10255 {
10256 /* not implemented yet */
10257 return;
10258 }
10259
10260
10261 static void __unused
10262 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
10263 {
10264 }
10265
10266 static void __unused
10267 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
10268 {
10269 }
10270
10271
10272
10273 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
10274
10275 MARK_AS_PMAP_TEXT static mach_vm_size_t
10276 pmap_query_resident_internal(
10277 pmap_t pmap,
10278 vm_map_address_t start,
10279 vm_map_address_t end,
10280 mach_vm_size_t *compressed_bytes_p)
10281 {
10282 mach_vm_size_t resident_bytes = 0;
10283 mach_vm_size_t compressed_bytes = 0;
10284
10285 pt_entry_t *bpte, *epte;
10286 pt_entry_t *pte_p;
10287 tt_entry_t *tte_p;
10288
10289 if (pmap == NULL) {
10290 return PMAP_RESIDENT_INVALID;
10291 }
10292
10293 VALIDATE_PMAP(pmap);
10294
10295 /* Ensure that this request is valid, and addresses exactly one TTE. */
10296 if (__improbable((start % ARM_PGBYTES) || (end % ARM_PGBYTES))) {
10297 panic("%s: address range %p, %p not page-aligned", __func__, (void*)start, (void*)end);
10298 }
10299
10300 if (__improbable((end < start) || ((end - start) > (PTE_PGENTRIES * ARM_PGBYTES)))) {
10301 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
10302 }
10303
10304 PMAP_LOCK(pmap);
10305 tte_p = pmap_tte(pmap, start);
10306 if (tte_p == (tt_entry_t *) NULL) {
10307 PMAP_UNLOCK(pmap);
10308 return PMAP_RESIDENT_INVALID;
10309 }
10310 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
10311 #if (__ARM_VMSA__ == 7)
10312 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10313 bpte = &pte_p[ptenum(start)];
10314 epte = bpte + atop(end - start);
10315 #else
10316 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10317 bpte = &pte_p[tt3_index(pmap, start)];
10318 epte = bpte + ((end - start) >> ARM_TT_L3_SHIFT);
10319 #endif
10320
10321 for (; bpte < epte; bpte++) {
10322 if (ARM_PTE_IS_COMPRESSED(*bpte)) {
10323 compressed_bytes += ARM_PGBYTES;
10324 } else if (pa_valid(pte_to_pa(*bpte))) {
10325 resident_bytes += ARM_PGBYTES;
10326 }
10327 }
10328 }
10329 PMAP_UNLOCK(pmap);
10330
10331 if (compressed_bytes_p) {
10332 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10333 *compressed_bytes_p += compressed_bytes;
10334 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10335 }
10336
10337 return resident_bytes;
10338 }
10339
10340 mach_vm_size_t
10341 pmap_query_resident(
10342 pmap_t pmap,
10343 vm_map_address_t start,
10344 vm_map_address_t end,
10345 mach_vm_size_t *compressed_bytes_p)
10346 {
10347 mach_vm_size_t total_resident_bytes;
10348 mach_vm_size_t compressed_bytes;
10349 vm_map_address_t va;
10350
10351
10352 if (pmap == PMAP_NULL) {
10353 if (compressed_bytes_p) {
10354 *compressed_bytes_p = 0;
10355 }
10356 return 0;
10357 }
10358
10359 total_resident_bytes = 0;
10360 compressed_bytes = 0;
10361
10362 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
10363 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
10364 VM_KERNEL_ADDRHIDE(end));
10365
10366 va = start;
10367 while (va < end) {
10368 vm_map_address_t l;
10369 mach_vm_size_t resident_bytes;
10370
10371 l = ((va + ARM_TT_TWIG_SIZE) & ~ARM_TT_TWIG_OFFMASK);
10372
10373 if (l > end) {
10374 l = end;
10375 }
10376 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
10377 if (resident_bytes == PMAP_RESIDENT_INVALID) {
10378 break;
10379 }
10380
10381 total_resident_bytes += resident_bytes;
10382
10383 va = l;
10384 }
10385
10386 if (compressed_bytes_p) {
10387 *compressed_bytes_p = compressed_bytes;
10388 }
10389
10390 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
10391 total_resident_bytes);
10392
10393 return total_resident_bytes;
10394 }
10395
10396 #if MACH_ASSERT
10397 static void
10398 pmap_check_ledgers(
10399 pmap_t pmap)
10400 {
10401 ledger_amount_t bal;
10402 int pid;
10403 char *procname;
10404 boolean_t do_panic;
10405
10406 if (pmap->pmap_pid == 0) {
10407 /*
10408 * This pmap was not or is no longer fully associated
10409 * with a task (e.g. the old pmap after a fork()/exec() or
10410 * spawn()). Its "ledger" still points at a task that is
10411 * now using a different (and active) address space, so
10412 * we can't check that all the pmap ledgers are balanced here.
10413 *
10414 * If the "pid" is set, that means that we went through
10415 * pmap_set_process() in task_terminate_internal(), so
10416 * this task's ledger should not have been re-used and
10417 * all the pmap ledgers should be back to 0.
10418 */
10419 return;
10420 }
10421
10422 do_panic = FALSE;
10423 pid = pmap->pmap_pid;
10424 procname = pmap->pmap_procname;
10425
10426 pmap_ledgers_drift.num_pmaps_checked++;
10427
10428 #define LEDGER_CHECK_BALANCE(__LEDGER) \
10429 MACRO_BEGIN \
10430 int panic_on_negative = TRUE; \
10431 ledger_get_balance(pmap->ledger, \
10432 task_ledgers.__LEDGER, \
10433 &bal); \
10434 ledger_get_panic_on_negative(pmap->ledger, \
10435 task_ledgers.__LEDGER, \
10436 &panic_on_negative); \
10437 if (bal != 0) { \
10438 if (panic_on_negative || \
10439 (pmap_ledgers_panic && \
10440 pmap_ledgers_panic_leeway > 0 && \
10441 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
10442 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
10443 do_panic = TRUE; \
10444 } \
10445 printf("LEDGER BALANCE proc %d (%s) " \
10446 "\"%s\" = %lld\n", \
10447 pid, procname, #__LEDGER, bal); \
10448 if (bal > 0) { \
10449 pmap_ledgers_drift.__LEDGER##_over++; \
10450 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
10451 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
10452 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
10453 } \
10454 } else if (bal < 0) { \
10455 pmap_ledgers_drift.__LEDGER##_under++; \
10456 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
10457 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
10458 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
10459 } \
10460 } \
10461 } \
10462 MACRO_END
10463
10464 LEDGER_CHECK_BALANCE(phys_footprint);
10465 LEDGER_CHECK_BALANCE(internal);
10466 LEDGER_CHECK_BALANCE(internal_compressed);
10467 LEDGER_CHECK_BALANCE(iokit_mapped);
10468 LEDGER_CHECK_BALANCE(alternate_accounting);
10469 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
10470 LEDGER_CHECK_BALANCE(page_table);
10471 LEDGER_CHECK_BALANCE(purgeable_volatile);
10472 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
10473 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
10474 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
10475 LEDGER_CHECK_BALANCE(network_volatile);
10476 LEDGER_CHECK_BALANCE(network_nonvolatile);
10477 LEDGER_CHECK_BALANCE(network_volatile_compressed);
10478 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
10479
10480 if (do_panic) {
10481 if (pmap_ledgers_panic) {
10482 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
10483 pmap, pid, procname);
10484 } else {
10485 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
10486 pmap, pid, procname);
10487 }
10488 }
10489
10490 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
10491 #if 00
10492 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
10493 #endif
10494 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
10495 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
10496 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
10497 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
10498 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
10499 }
10500 #endif /* MACH_ASSERT */
10501
10502 void
10503 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
10504 {
10505 }
10506
10507
10508 #if CONFIG_PGTRACE
10509 #define PROF_START uint64_t t, nanot;\
10510 t = mach_absolute_time();
10511
10512 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
10513 kprintf("%s: took %llu ns\n", __func__, nanot);
10514
10515 #define PMAP_PGTRACE_LOCK(p) \
10516 do { \
10517 *(p) = ml_set_interrupts_enabled(false); \
10518 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
10519 ml_set_interrupts_enabled(*(p)); \
10520 } while (true)
10521
10522 #define PMAP_PGTRACE_UNLOCK(p) \
10523 do { \
10524 simple_unlock(&(pmap_pgtrace.lock)); \
10525 ml_set_interrupts_enabled(*(p)); \
10526 } while (0)
10527
10528 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
10529 do { \
10530 *(pte_p) = (pte_entry); \
10531 FLUSH_PTE(pte_p); \
10532 } while (0)
10533
10534 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
10535
10536 typedef enum {
10537 UNDEFINED,
10538 PA_UNDEFINED,
10539 VA_UNDEFINED,
10540 DEFINED
10541 } pmap_pgtrace_page_state_t;
10542
10543 typedef struct {
10544 queue_chain_t chain;
10545
10546 /*
10547 * pa - pa
10548 * maps - list of va maps to upper pa
10549 * map_pool - map pool
10550 * map_waste - waste can
10551 * state - state
10552 */
10553 pmap_paddr_t pa;
10554 queue_head_t maps;
10555 queue_head_t map_pool;
10556 queue_head_t map_waste;
10557 pmap_pgtrace_page_state_t state;
10558 } pmap_pgtrace_page_t;
10559
10560 static struct {
10561 /*
10562 * pages - list of tracing page info
10563 */
10564 queue_head_t pages;
10565 decl_simple_lock_data(, lock);
10566 } pmap_pgtrace = {};
10567
10568 static void
10569 pmap_pgtrace_init(void)
10570 {
10571 queue_init(&(pmap_pgtrace.pages));
10572 simple_lock_init(&(pmap_pgtrace.lock), 0);
10573
10574 boolean_t enabled;
10575
10576 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
10577 pgtrace_enabled = enabled;
10578 }
10579 }
10580
10581 // find a page with given pa - pmap_pgtrace should be locked
10582 inline static pmap_pgtrace_page_t *
10583 pmap_pgtrace_find_page(pmap_paddr_t pa)
10584 {
10585 queue_head_t *q = &(pmap_pgtrace.pages);
10586 pmap_pgtrace_page_t *p;
10587
10588 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10589 if (p->state == UNDEFINED) {
10590 continue;
10591 }
10592 if (p->state == PA_UNDEFINED) {
10593 continue;
10594 }
10595 if (p->pa == pa) {
10596 return p;
10597 }
10598 }
10599
10600 return NULL;
10601 }
10602
10603 // enter clone of given pmap, va page and range - pmap should be locked
10604 static bool
10605 pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
10606 {
10607 bool ints;
10608 queue_head_t *q = &(pmap_pgtrace.pages);
10609 pmap_paddr_t pa_page;
10610 pt_entry_t *ptep, *cptep;
10611 pmap_pgtrace_page_t *p;
10612 bool found = false;
10613
10614 PMAP_ASSERT_LOCKED(pmap);
10615 assert(va_page == arm_trunc_page(va_page));
10616
10617 PMAP_PGTRACE_LOCK(&ints);
10618
10619 ptep = pmap_pte(pmap, va_page);
10620
10621 // target pte should exist
10622 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
10623 PMAP_PGTRACE_UNLOCK(&ints);
10624 return false;
10625 }
10626
10627 queue_head_t *mapq;
10628 queue_head_t *mappool;
10629 pmap_pgtrace_map_t *map = NULL;
10630
10631 pa_page = pte_to_pa(*ptep);
10632
10633 // find if we have a page info defined for this
10634 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10635 mapq = &(p->maps);
10636 mappool = &(p->map_pool);
10637
10638 switch (p->state) {
10639 case PA_UNDEFINED:
10640 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10641 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10642 p->pa = pa_page;
10643 map->range.start = start;
10644 map->range.end = end;
10645 found = true;
10646 break;
10647 }
10648 }
10649 break;
10650
10651 case VA_UNDEFINED:
10652 if (p->pa != pa_page) {
10653 break;
10654 }
10655 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10656 if (map->cloned == false) {
10657 map->pmap = pmap;
10658 map->ova = va_page;
10659 map->range.start = start;
10660 map->range.end = end;
10661 found = true;
10662 break;
10663 }
10664 }
10665 break;
10666
10667 case DEFINED:
10668 if (p->pa != pa_page) {
10669 break;
10670 }
10671 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10672 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
10673 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
10674 break;
10675 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
10676 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
10677 break;
10678 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10679 // range should be already defined as well
10680 found = true;
10681 break;
10682 }
10683 }
10684 break;
10685
10686 default:
10687 panic("invalid state p->state=%x\n", p->state);
10688 }
10689
10690 if (found == true) {
10691 break;
10692 }
10693 }
10694
10695 // do not clone if no page info found
10696 if (found == false) {
10697 PMAP_PGTRACE_UNLOCK(&ints);
10698 return false;
10699 }
10700
10701 // copy pre, target and post ptes to clone ptes
10702 for (int i = 0; i < 3; i++) {
10703 ptep = pmap_pte(pmap, va_page + (i - 1) * ARM_PGBYTES);
10704 cptep = pmap_pte(kernel_pmap, map->cva[i]);
10705 assert(cptep != NULL);
10706 if (ptep == NULL) {
10707 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
10708 } else {
10709 PGTRACE_WRITE_PTE(cptep, *ptep);
10710 }
10711 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES);
10712 }
10713
10714 // get ptes for original and clone
10715 ptep = pmap_pte(pmap, va_page);
10716 cptep = pmap_pte(kernel_pmap, map->cva[1]);
10717
10718 // invalidate original pte and mark it as a pgtrace page
10719 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
10720 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova + ARM_PGBYTES);
10721
10722 map->cloned = true;
10723 p->state = DEFINED;
10724
10725 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
10726
10727 PMAP_PGTRACE_UNLOCK(&ints);
10728
10729 return true;
10730 }
10731
10732 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
10733 static void
10734 pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
10735 {
10736 bool ints, found = false;
10737 pmap_pgtrace_page_t *p;
10738 pt_entry_t *ptep;
10739
10740 PMAP_PGTRACE_LOCK(&ints);
10741
10742 // we must have this page info
10743 p = pmap_pgtrace_find_page(pa);
10744 if (p == NULL) {
10745 goto unlock_exit;
10746 }
10747
10748 // find matching map
10749 queue_head_t *mapq = &(p->maps);
10750 queue_head_t *mappool = &(p->map_pool);
10751 pmap_pgtrace_map_t *map;
10752
10753 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10754 if (map->pmap == pmap && map->ova == va) {
10755 found = true;
10756 break;
10757 }
10758 }
10759
10760 if (!found) {
10761 goto unlock_exit;
10762 }
10763
10764 if (map->cloned == true) {
10765 // Restore back the pte to original state
10766 ptep = pmap_pte(pmap, map->ova);
10767 assert(ptep);
10768 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10769 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES);
10770
10771 // revert clone pages
10772 for (int i = 0; i < 3; i++) {
10773 ptep = pmap_pte(kernel_pmap, map->cva[i]);
10774 assert(ptep != NULL);
10775 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
10776 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES);
10777 }
10778 }
10779
10780 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
10781 map->pmap = NULL;
10782 map->ova = (vm_map_offset_t)NULL;
10783 map->cloned = false;
10784 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
10785
10786 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
10787
10788 unlock_exit:
10789 PMAP_PGTRACE_UNLOCK(&ints);
10790 }
10791
10792 // remove all clones of given pa - pmap must be locked
10793 static void
10794 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
10795 {
10796 bool ints;
10797 pmap_pgtrace_page_t *p;
10798 pt_entry_t *ptep;
10799
10800 PMAP_PGTRACE_LOCK(&ints);
10801
10802 // we must have this page info
10803 p = pmap_pgtrace_find_page(pa);
10804 if (p == NULL) {
10805 PMAP_PGTRACE_UNLOCK(&ints);
10806 return;
10807 }
10808
10809 queue_head_t *mapq = &(p->maps);
10810 queue_head_t *mappool = &(p->map_pool);
10811 queue_head_t *mapwaste = &(p->map_waste);
10812 pmap_pgtrace_map_t *map;
10813
10814 // move maps to waste
10815 while (!queue_empty(mapq)) {
10816 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
10817 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
10818 }
10819
10820 PMAP_PGTRACE_UNLOCK(&ints);
10821
10822 // sanitize maps in waste
10823 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
10824 if (map->cloned == true) {
10825 PMAP_LOCK(map->pmap);
10826
10827 // restore back original pte
10828 ptep = pmap_pte(map->pmap, map->ova);
10829 assert(ptep);
10830 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10831 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova + ARM_PGBYTES);
10832
10833 // revert clone ptes
10834 for (int i = 0; i < 3; i++) {
10835 ptep = pmap_pte(kernel_pmap, map->cva[i]);
10836 assert(ptep != NULL);
10837 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
10838 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES);
10839 }
10840
10841 PMAP_UNLOCK(map->pmap);
10842 }
10843
10844 map->pmap = NULL;
10845 map->ova = (vm_map_offset_t)NULL;
10846 map->cloned = false;
10847 }
10848
10849 PMAP_PGTRACE_LOCK(&ints);
10850
10851 // recycle maps back to map_pool
10852 while (!queue_empty(mapwaste)) {
10853 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
10854 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
10855 }
10856
10857 PMAP_PGTRACE_UNLOCK(&ints);
10858 }
10859
10860 inline static void
10861 pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
10862 {
10863 uint64_t tsz;
10864 vm_map_offset_t end;
10865
10866 if (pmap == kernel_pmap) {
10867 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
10868 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64 - tsz)) << (64 - tsz));
10869 *endp = VM_MAX_KERNEL_ADDRESS;
10870 } else {
10871 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
10872 if (tsz == 64) {
10873 end = 0;
10874 } else {
10875 end = ((uint64_t)1 << (64 - tsz)) - 1;
10876 }
10877
10878 *startp = 0;
10879 *endp = end;
10880 }
10881
10882 assert(*endp > *startp);
10883
10884 return;
10885 }
10886
10887 // has pa mapped in given pmap? then clone it
10888 static uint64_t
10889 pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
10890 {
10891 uint64_t ret = 0;
10892 vm_map_offset_t min, max;
10893 vm_map_offset_t cur_page, end_page;
10894 pt_entry_t *ptep;
10895 tt_entry_t *ttep;
10896 tt_entry_t tte;
10897
10898 pmap_pgtrace_get_search_space(pmap, &min, &max);
10899
10900 cur_page = arm_trunc_page(min);
10901 end_page = arm_trunc_page(max);
10902 while (cur_page <= end_page) {
10903 vm_map_offset_t add = 0;
10904
10905 PMAP_LOCK(pmap);
10906
10907 // skip uninterested space
10908 if (pmap == kernel_pmap &&
10909 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
10910 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
10911 add = ARM_PGBYTES;
10912 goto unlock_continue;
10913 }
10914
10915 #if __ARM64_TWO_LEVEL_PMAP__
10916 // check whether we can skip l2
10917 ttep = pmap_tt2e(pmap, cur_page);
10918 assert(ttep);
10919 tte = *ttep;
10920 #else
10921 // check whether we can skip l1
10922 ttep = pmap_tt1e(pmap, cur_page);
10923 assert(ttep);
10924 tte = *ttep;
10925 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10926 add = ARM_TT_L1_SIZE;
10927 goto unlock_continue;
10928 }
10929
10930 // how about l2
10931 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, cur_page)];
10932 #endif
10933 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10934 add = ARM_TT_L2_SIZE;
10935 goto unlock_continue;
10936 }
10937
10938 // ptep finally
10939 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, cur_page)]);
10940 if (ptep == PT_ENTRY_NULL) {
10941 add = ARM_TT_L3_SIZE;
10942 goto unlock_continue;
10943 }
10944
10945 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
10946 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
10947 ret++;
10948 }
10949 }
10950
10951 add = ARM_PGBYTES;
10952
10953 unlock_continue:
10954 PMAP_UNLOCK(pmap);
10955
10956 //overflow
10957 if (cur_page + add < cur_page) {
10958 break;
10959 }
10960
10961 cur_page += add;
10962 }
10963
10964
10965 return ret;
10966 }
10967
10968 // search pv table and clone vas of given pa
10969 static uint64_t
10970 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
10971 {
10972 uint64_t ret = 0;
10973 unsigned long pai;
10974 pv_entry_t **pvh;
10975 pt_entry_t *ptep;
10976 pmap_t pmap;
10977
10978 typedef struct {
10979 queue_chain_t chain;
10980 pmap_t pmap;
10981 vm_map_offset_t va;
10982 } pmap_va_t;
10983
10984 queue_head_t pmapvaq;
10985 pmap_va_t *pmapva;
10986
10987 queue_init(&pmapvaq);
10988
10989 pai = pa_index(pa);
10990 LOCK_PVH(pai);
10991 pvh = pai_to_pvh(pai);
10992
10993 // collect pmap/va pair from pvh
10994 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
10995 ptep = pvh_ptep(pvh);
10996 pmap = ptep_get_pmap(ptep);
10997
10998 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
10999 pmapva->pmap = pmap;
11000 pmapva->va = ptep_get_va(ptep);
11001
11002 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
11003 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
11004 pv_entry_t *pvep;
11005
11006 pvep = pvh_list(pvh);
11007 while (pvep) {
11008 ptep = pve_get_ptep(pvep);
11009 pmap = ptep_get_pmap(ptep);
11010
11011 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
11012 pmapva->pmap = pmap;
11013 pmapva->va = ptep_get_va(ptep);
11014
11015 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
11016
11017 pvep = PVE_NEXT_PTR(pve_next(pvep));
11018 }
11019 }
11020
11021 UNLOCK_PVH(pai);
11022
11023 // clone them while making sure mapping still exists
11024 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
11025 PMAP_LOCK(pmapva->pmap);
11026 ptep = pmap_pte(pmapva->pmap, pmapva->va);
11027 if (pte_to_pa(*ptep) == pa) {
11028 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
11029 ret++;
11030 }
11031 }
11032 PMAP_UNLOCK(pmapva->pmap);
11033
11034 kfree(pmapva, sizeof(pmap_va_t));
11035 }
11036
11037 return ret;
11038 }
11039
11040 // allocate a page info
11041 static pmap_pgtrace_page_t *
11042 pmap_pgtrace_alloc_page(void)
11043 {
11044 pmap_pgtrace_page_t *p;
11045 queue_head_t *mapq;
11046 queue_head_t *mappool;
11047 queue_head_t *mapwaste;
11048 pmap_pgtrace_map_t *map;
11049
11050 p = kalloc(sizeof(pmap_pgtrace_page_t));
11051 assert(p);
11052
11053 p->state = UNDEFINED;
11054
11055 mapq = &(p->maps);
11056 mappool = &(p->map_pool);
11057 mapwaste = &(p->map_waste);
11058 queue_init(mapq);
11059 queue_init(mappool);
11060 queue_init(mapwaste);
11061
11062 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
11063 vm_map_offset_t newcva;
11064 pt_entry_t *cptep;
11065 kern_return_t kr;
11066 vm_map_entry_t entry;
11067
11068 // get a clone va
11069 vm_object_reference(kernel_object);
11070 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3 * ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
11071 if (kr != KERN_SUCCESS) {
11072 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
11073 }
11074 VME_OBJECT_SET(entry, kernel_object);
11075 VME_OFFSET_SET(entry, newcva);
11076 vm_map_unlock(kernel_map);
11077
11078 // fill default clone page info and add to pool
11079 map = kalloc(sizeof(pmap_pgtrace_map_t));
11080 for (int j = 0; j < 3; j++) {
11081 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
11082
11083 // pre-expand pmap while preemption enabled
11084 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
11085 if (kr != KERN_SUCCESS) {
11086 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
11087 }
11088
11089 cptep = pmap_pte(kernel_pmap, addr);
11090 assert(cptep != NULL);
11091
11092 map->cva[j] = addr;
11093 map->cva_spte[j] = *cptep;
11094 }
11095 map->range.start = map->range.end = 0;
11096 map->cloned = false;
11097 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
11098 }
11099
11100 return p;
11101 }
11102
11103 // free a page info
11104 static void
11105 pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
11106 {
11107 queue_head_t *mapq;
11108 queue_head_t *mappool;
11109 queue_head_t *mapwaste;
11110 pmap_pgtrace_map_t *map;
11111
11112 assert(p);
11113
11114 mapq = &(p->maps);
11115 mappool = &(p->map_pool);
11116 mapwaste = &(p->map_waste);
11117
11118 while (!queue_empty(mapq)) {
11119 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
11120 kfree(map, sizeof(pmap_pgtrace_map_t));
11121 }
11122
11123 while (!queue_empty(mappool)) {
11124 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
11125 kfree(map, sizeof(pmap_pgtrace_map_t));
11126 }
11127
11128 while (!queue_empty(mapwaste)) {
11129 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
11130 kfree(map, sizeof(pmap_pgtrace_map_t));
11131 }
11132
11133 kfree(p, sizeof(pmap_pgtrace_page_t));
11134 }
11135
11136 // construct page infos with the given address range
11137 int
11138 pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
11139 {
11140 int ret = 0;
11141 pt_entry_t *ptep;
11142 queue_head_t *q = &(pmap_pgtrace.pages);
11143 bool ints;
11144 vm_map_offset_t cur_page, end_page;
11145
11146 if (start > end) {
11147 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
11148 return -1;
11149 }
11150
11151 PROF_START
11152
11153 // add each page in given range
11154 cur_page = arm_trunc_page(start);
11155 end_page = arm_trunc_page(end);
11156 while (cur_page <= end_page) {
11157 pmap_paddr_t pa_page = 0;
11158 uint64_t num_cloned = 0;
11159 pmap_pgtrace_page_t *p = NULL, *newp;
11160 bool free_newp = true;
11161 pmap_pgtrace_page_state_t state;
11162
11163 // do all allocations outside of spinlocks
11164 newp = pmap_pgtrace_alloc_page();
11165
11166 // keep lock orders in pmap, kernel_pmap and pgtrace lock
11167 if (pmap != NULL) {
11168 PMAP_LOCK(pmap);
11169 }
11170 if (pmap != kernel_pmap) {
11171 PMAP_LOCK(kernel_pmap);
11172 }
11173
11174 // addresses are physical if pmap is null
11175 if (pmap == NULL) {
11176 ptep = NULL;
11177 pa_page = cur_page;
11178 state = VA_UNDEFINED;
11179 } else {
11180 ptep = pmap_pte(pmap, cur_page);
11181 if (ptep != NULL) {
11182 pa_page = pte_to_pa(*ptep);
11183 state = DEFINED;
11184 } else {
11185 state = PA_UNDEFINED;
11186 }
11187 }
11188
11189 // search if we have a page info already
11190 PMAP_PGTRACE_LOCK(&ints);
11191 if (state != PA_UNDEFINED) {
11192 p = pmap_pgtrace_find_page(pa_page);
11193 }
11194
11195 // add pre-allocated page info if nothing found
11196 if (p == NULL) {
11197 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
11198 p = newp;
11199 free_newp = false;
11200 }
11201
11202 // now p points what we want
11203 p->state = state;
11204
11205 queue_head_t *mapq = &(p->maps);
11206 queue_head_t *mappool = &(p->map_pool);
11207 pmap_pgtrace_map_t *map;
11208 vm_map_offset_t start_offset, end_offset;
11209
11210 // calculate trace offsets in the page
11211 if (cur_page > start) {
11212 start_offset = 0;
11213 } else {
11214 start_offset = start - cur_page;
11215 }
11216 if (cur_page == end_page) {
11217 end_offset = end - end_page;
11218 } else {
11219 end_offset = ARM_PGBYTES - 1;
11220 }
11221
11222 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
11223
11224 // fill map info
11225 assert(!queue_empty(mappool));
11226 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
11227 if (p->state == PA_UNDEFINED) {
11228 map->pmap = pmap;
11229 map->ova = cur_page;
11230 map->range.start = start_offset;
11231 map->range.end = end_offset;
11232 } else if (p->state == VA_UNDEFINED) {
11233 p->pa = pa_page;
11234 map->range.start = start_offset;
11235 map->range.end = end_offset;
11236 } else if (p->state == DEFINED) {
11237 p->pa = pa_page;
11238 map->pmap = pmap;
11239 map->ova = cur_page;
11240 map->range.start = start_offset;
11241 map->range.end = end_offset;
11242 } else {
11243 panic("invalid p->state=%d\n", p->state);
11244 }
11245
11246 // not cloned yet
11247 map->cloned = false;
11248 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
11249
11250 // unlock locks
11251 PMAP_PGTRACE_UNLOCK(&ints);
11252 if (pmap != kernel_pmap) {
11253 PMAP_UNLOCK(kernel_pmap);
11254 }
11255 if (pmap != NULL) {
11256 PMAP_UNLOCK(pmap);
11257 }
11258
11259 // now clone it
11260 if (pa_valid(pa_page)) {
11261 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
11262 }
11263 if (pmap == NULL) {
11264 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
11265 } else {
11266 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
11267 }
11268
11269 // free pre-allocations if we didn't add it to the q
11270 if (free_newp) {
11271 pmap_pgtrace_free_page(newp);
11272 }
11273
11274 if (num_cloned == 0) {
11275 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
11276 }
11277
11278 ret += num_cloned;
11279
11280 // overflow
11281 if (cur_page + ARM_PGBYTES < cur_page) {
11282 break;
11283 } else {
11284 cur_page += ARM_PGBYTES;
11285 }
11286 }
11287
11288 PROF_END
11289
11290 return ret;
11291 }
11292
11293 // delete page infos for given address range
11294 int
11295 pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
11296 {
11297 int ret = 0;
11298 bool ints;
11299 queue_head_t *q = &(pmap_pgtrace.pages);
11300 pmap_pgtrace_page_t *p;
11301 vm_map_offset_t cur_page, end_page;
11302
11303 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
11304
11305 PROF_START
11306
11307 pt_entry_t *ptep;
11308 pmap_paddr_t pa_page;
11309
11310 // remove page info from start to end
11311 cur_page = arm_trunc_page(start);
11312 end_page = arm_trunc_page(end);
11313 while (cur_page <= end_page) {
11314 p = NULL;
11315
11316 if (pmap == NULL) {
11317 pa_page = cur_page;
11318 } else {
11319 PMAP_LOCK(pmap);
11320 ptep = pmap_pte(pmap, cur_page);
11321 if (ptep == NULL) {
11322 PMAP_UNLOCK(pmap);
11323 goto cont;
11324 }
11325 pa_page = pte_to_pa(*ptep);
11326 PMAP_UNLOCK(pmap);
11327 }
11328
11329 // remove all clones and validate
11330 pmap_pgtrace_remove_all_clone(pa_page);
11331
11332 // find page info and delete
11333 PMAP_PGTRACE_LOCK(&ints);
11334 p = pmap_pgtrace_find_page(pa_page);
11335 if (p != NULL) {
11336 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
11337 ret++;
11338 }
11339 PMAP_PGTRACE_UNLOCK(&ints);
11340
11341 // free outside of locks
11342 if (p != NULL) {
11343 pmap_pgtrace_free_page(p);
11344 }
11345
11346 cont:
11347 // overflow
11348 if (cur_page + ARM_PGBYTES < cur_page) {
11349 break;
11350 } else {
11351 cur_page += ARM_PGBYTES;
11352 }
11353 }
11354
11355 PROF_END
11356
11357 return ret;
11358 }
11359
11360 kern_return_t
11361 pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
11362 {
11363 pt_entry_t *ptep;
11364 pgtrace_run_result_t res;
11365 pmap_pgtrace_page_t *p;
11366 bool ints, found = false;
11367 pmap_paddr_t pa;
11368
11369 // Quick check if we are interested
11370 ptep = pmap_pte(pmap, va);
11371 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11372 return KERN_FAILURE;
11373 }
11374
11375 PMAP_PGTRACE_LOCK(&ints);
11376
11377 // Check again since access is serialized
11378 ptep = pmap_pte(pmap, va);
11379 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11380 PMAP_PGTRACE_UNLOCK(&ints);
11381 return KERN_FAILURE;
11382 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
11383 // Somehow this cpu's tlb has not updated
11384 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
11385 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES);
11386
11387 PMAP_PGTRACE_UNLOCK(&ints);
11388 return KERN_SUCCESS;
11389 }
11390
11391 // Find if this pa is what we are tracing
11392 pa = pte_to_pa(*ptep);
11393
11394 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
11395 if (p == NULL) {
11396 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11397 }
11398
11399 // find if pmap and va are also matching
11400 queue_head_t *mapq = &(p->maps);
11401 queue_head_t *mapwaste = &(p->map_waste);
11402 pmap_pgtrace_map_t *map;
11403
11404 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
11405 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11406 found = true;
11407 break;
11408 }
11409 }
11410
11411 // if not found, search map waste as they are still valid
11412 if (!found) {
11413 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
11414 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11415 found = true;
11416 break;
11417 }
11418 }
11419 }
11420
11421 if (!found) {
11422 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11423 }
11424
11425 // Decode and run it on the clone map
11426 bzero(&res, sizeof(res));
11427 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
11428 va, map->cva, // fault va and clone page vas
11429 ss, &res);
11430
11431 // write a log if in range
11432 vm_map_offset_t offset = va - map->ova;
11433 if (map->range.start <= offset && offset <= map->range.end) {
11434 pgtrace_write_log(res);
11435 }
11436
11437 PMAP_PGTRACE_UNLOCK(&ints);
11438
11439 // Return to next instruction
11440 set_saved_state_pc(ss, get_saved_state_pc(ss) + sizeof(uint32_t));
11441
11442 return KERN_SUCCESS;
11443 }
11444 #endif
11445
11446 boolean_t
11447 pmap_enforces_execute_only(
11448 #if (__ARM_VMSA__ == 7)
11449 __unused
11450 #endif
11451 pmap_t pmap)
11452 {
11453 #if (__ARM_VMSA__ > 7)
11454 return pmap != kernel_pmap;
11455 #else
11456 return FALSE;
11457 #endif
11458 }
11459
11460 MARK_AS_PMAP_TEXT void
11461 pmap_set_jit_entitled_internal(
11462 __unused pmap_t pmap)
11463 {
11464 return;
11465 }
11466
11467 void
11468 pmap_set_jit_entitled(
11469 pmap_t pmap)
11470 {
11471 pmap_set_jit_entitled_internal(pmap);
11472 }
11473
11474 MARK_AS_PMAP_TEXT static kern_return_t
11475 pmap_query_page_info_internal(
11476 pmap_t pmap,
11477 vm_map_offset_t va,
11478 int *disp_p)
11479 {
11480 pmap_paddr_t pa;
11481 int disp;
11482 int pai;
11483 pt_entry_t *pte;
11484 pv_entry_t **pv_h, *pve_p;
11485
11486 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
11487 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11488 *disp_p = 0;
11489 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11490 return KERN_INVALID_ARGUMENT;
11491 }
11492
11493 disp = 0;
11494
11495 VALIDATE_PMAP(pmap);
11496 PMAP_LOCK(pmap);
11497
11498 pte = pmap_pte(pmap, va);
11499 if (pte == PT_ENTRY_NULL) {
11500 goto done;
11501 }
11502
11503 pa = pte_to_pa(*pte);
11504 if (pa == 0) {
11505 if (ARM_PTE_IS_COMPRESSED(*pte)) {
11506 disp |= PMAP_QUERY_PAGE_COMPRESSED;
11507 if (*pte & ARM_PTE_COMPRESSED_ALT) {
11508 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
11509 }
11510 }
11511 } else {
11512 disp |= PMAP_QUERY_PAGE_PRESENT;
11513 pai = (int) pa_index(pa);
11514 if (!pa_valid(pa)) {
11515 goto done;
11516 }
11517 LOCK_PVH(pai);
11518 pv_h = pai_to_pvh(pai);
11519 pve_p = PV_ENTRY_NULL;
11520 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
11521 pve_p = pvh_list(pv_h);
11522 while (pve_p != PV_ENTRY_NULL &&
11523 pve_get_ptep(pve_p) != pte) {
11524 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
11525 }
11526 }
11527 if (IS_ALTACCT_PAGE(pai, pve_p)) {
11528 disp |= PMAP_QUERY_PAGE_ALTACCT;
11529 } else if (IS_REUSABLE_PAGE(pai)) {
11530 disp |= PMAP_QUERY_PAGE_REUSABLE;
11531 } else if (IS_INTERNAL_PAGE(pai)) {
11532 disp |= PMAP_QUERY_PAGE_INTERNAL;
11533 }
11534 UNLOCK_PVH(pai);
11535 }
11536
11537 done:
11538 PMAP_UNLOCK(pmap);
11539 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11540 *disp_p = disp;
11541 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11542 return KERN_SUCCESS;
11543 }
11544
11545 kern_return_t
11546 pmap_query_page_info(
11547 pmap_t pmap,
11548 vm_map_offset_t va,
11549 int *disp_p)
11550 {
11551 return pmap_query_page_info_internal(pmap, va, disp_p);
11552 }
11553
11554 MARK_AS_PMAP_TEXT kern_return_t
11555 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
11556 {
11557
11558 return KERN_SUCCESS;
11559 }
11560
11561 kern_return_t
11562 pmap_return(boolean_t do_panic, boolean_t do_recurse)
11563 {
11564 return pmap_return_internal(do_panic, do_recurse);
11565 }
11566
11567
11568
11569 MARK_AS_PMAP_TEXT static void
11570 pmap_footprint_suspend_internal(
11571 vm_map_t map,
11572 boolean_t suspend)
11573 {
11574 #if DEVELOPMENT || DEBUG
11575 if (suspend) {
11576 current_thread()->pmap_footprint_suspended = TRUE;
11577 map->pmap->footprint_was_suspended = TRUE;
11578 } else {
11579 current_thread()->pmap_footprint_suspended = FALSE;
11580 }
11581 #else /* DEVELOPMENT || DEBUG */
11582 (void) map;
11583 (void) suspend;
11584 #endif /* DEVELOPMENT || DEBUG */
11585 }
11586
11587 void
11588 pmap_footprint_suspend(
11589 vm_map_t map,
11590 boolean_t suspend)
11591 {
11592 pmap_footprint_suspend_internal(map, suspend);
11593 }
11594
11595 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
11596
11597 struct page_table_level_info {
11598 uint64_t size;
11599 uint64_t offmask;
11600 uint64_t shift;
11601 uint64_t index_mask;
11602 uint64_t valid_mask;
11603 uint64_t type_mask;
11604 uint64_t type_block;
11605 };
11606
11607 struct page_table_dump_header {
11608 uint64_t pa;
11609 uint64_t num_entries;
11610 uint64_t start_va;
11611 uint64_t end_va;
11612 };
11613
11614 struct page_table_level_info page_table_levels[] =
11615 { { ARM_TT_L0_SIZE, ARM_TT_L0_OFFMASK, ARM_TT_L0_SHIFT, ARM_TT_L0_INDEX_MASK, ARM_TTE_VALID, ARM_TTE_TYPE_MASK, ARM_TTE_TYPE_BLOCK },
11616 { ARM_TT_L1_SIZE, ARM_TT_L1_OFFMASK, ARM_TT_L1_SHIFT, ARM_TT_L1_INDEX_MASK, ARM_TTE_VALID, ARM_TTE_TYPE_MASK, ARM_TTE_TYPE_BLOCK },
11617 { ARM_TT_L2_SIZE, ARM_TT_L2_OFFMASK, ARM_TT_L2_SHIFT, ARM_TT_L2_INDEX_MASK, ARM_TTE_VALID, ARM_TTE_TYPE_MASK, ARM_TTE_TYPE_BLOCK },
11618 { ARM_TT_L3_SIZE, ARM_TT_L3_OFFMASK, ARM_TT_L3_SHIFT, ARM_TT_L3_INDEX_MASK, ARM_PTE_TYPE_VALID, ARM_PTE_TYPE_MASK, ARM_TTE_TYPE_L3BLOCK } };
11619
11620 static size_t
11621 pmap_dump_page_tables_recurse(const tt_entry_t *ttp,
11622 unsigned int cur_level,
11623 uint64_t start_va,
11624 void *bufp,
11625 void *buf_end)
11626 {
11627 size_t bytes_used = 0;
11628 uint64_t num_entries = ARM_PGBYTES / sizeof(*ttp);
11629 uint64_t size = page_table_levels[cur_level].size;
11630 uint64_t valid_mask = page_table_levels[cur_level].valid_mask;
11631 uint64_t type_mask = page_table_levels[cur_level].type_mask;
11632 uint64_t type_block = page_table_levels[cur_level].type_block;
11633
11634 if (cur_level == arm64_root_pgtable_level) {
11635 num_entries = arm64_root_pgtable_num_ttes;
11636 }
11637
11638 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
11639 const tt_entry_t *tt_end = &ttp[num_entries];
11640
11641 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
11642 return 0;
11643 }
11644
11645 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
11646 header->pa = ml_static_vtop((vm_offset_t)ttp);
11647 header->num_entries = num_entries;
11648 header->start_va = start_va;
11649 header->end_va = start_va + (num_entries * size);
11650
11651 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
11652 bytes_used += (sizeof(*header) + tt_size);
11653 uint64_t current_va = start_va;
11654
11655 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
11656 tt_entry_t tte = *ttep;
11657
11658 if (!(tte & valid_mask)) {
11659 continue;
11660 }
11661
11662 if ((tte & type_mask) == type_block) {
11663 continue;
11664 } else {
11665 if (cur_level >= PMAP_TT_MAX_LEVEL) {
11666 panic("%s: corrupt entry %#llx at %p, "
11667 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
11668 __FUNCTION__, tte, ttep,
11669 ttp, cur_level, bufp, buf_end);
11670 }
11671
11672 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
11673
11674 size_t recurse_result = pmap_dump_page_tables_recurse(next_tt, cur_level + 1, current_va, (uint8_t*)bufp + bytes_used, buf_end);
11675
11676 if (recurse_result == 0) {
11677 return 0;
11678 }
11679
11680 bytes_used += recurse_result;
11681 }
11682 }
11683
11684 return bytes_used;
11685 }
11686
11687 size_t
11688 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end)
11689 {
11690 if (not_in_kdp) {
11691 panic("pmap_dump_page_tables must only be called from kernel debugger context");
11692 }
11693 return pmap_dump_page_tables_recurse(pmap->tte, arm64_root_pgtable_level, pmap->min, bufp, buf_end);
11694 }
11695
11696 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
11697
11698 size_t
11699 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
11700 {
11701 return (size_t)-1;
11702 }
11703
11704 #endif /* !defined(__arm64__) */