]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
xnu-4903.221.2.tar.gz
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/thread.h>
41 #include <kern/sched.h>
42 #include <kern/zalloc.h>
43 #include <kern/kalloc.h>
44 #include <kern/ledger.h>
45 #include <kern/misc_protos.h>
46 #include <kern/spl.h>
47 #include <kern/xpr.h>
48 #include <kern/trustcache.h>
49
50 #include <os/overflow.h>
51
52 #include <vm/pmap.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_protos.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_pageout.h>
59 #include <vm/cpm.h>
60
61 #include <libkern/img4/interface.h>
62 #include <libkern/section_keywords.h>
63
64 #include <machine/atomic.h>
65 #include <machine/thread.h>
66 #include <machine/lowglobals.h>
67
68 #include <arm/caches_internal.h>
69 #include <arm/cpu_data.h>
70 #include <arm/cpu_data_internal.h>
71 #include <arm/cpu_capabilities.h>
72 #include <arm/cpu_number.h>
73 #include <arm/machine_cpu.h>
74 #include <arm/misc_protos.h>
75 #include <arm/trap.h>
76
77 #if (__ARM_VMSA__ > 7)
78 #include <arm64/proc_reg.h>
79 #include <pexpert/arm64/boot.h>
80 #if CONFIG_PGTRACE
81 #include <stdint.h>
82 #include <arm64/pgtrace.h>
83 #if CONFIG_PGTRACE_NONKEXT
84 #include <arm64/pgtrace_decoder.h>
85 #endif // CONFIG_PGTRACE_NONKEXT
86 #endif
87 #endif
88
89 #include <pexpert/device_tree.h>
90
91 #include <san/kasan.h>
92 #include <sys/cdefs.h>
93
94
95 #if MACH_ASSERT
96 int vm_footprint_suspend_allowed = 1;
97
98 extern int pmap_ledgers_panic;
99 extern int pmap_ledgers_panic_leeway;
100
101 int pmap_stats_assert = 1;
102 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
103 MACRO_BEGIN \
104 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
105 assertf(cond, fmt, ##__VA_ARGS__); \
106 MACRO_END
107 #else /* MACH_ASSERT */
108 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
109 #endif /* MACH_ASSERT */
110
111 #if DEVELOPMENT || DEBUG
112 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
113 (current_thread()->pmap_footprint_suspended)
114 #else /* DEVELOPMENT || DEBUG */
115 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
116 #endif /* DEVELOPMENT || DEBUG */
117
118
119
120 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
121 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
122
123
124 #if DEVELOPMENT || DEBUG
125 int panic_on_unsigned_execute = 0;
126 #endif /* DEVELOPMENT || DEBUG */
127
128
129 /* Virtual memory region for early allocation */
130 #if (__ARM_VMSA__ == 7)
131 #define VREGION1_START (VM_HIGH_KERNEL_WINDOW & ~ARM_TT_L1_PT_OFFMASK)
132 #else
133 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
134 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
135 #endif
136 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
137
138 extern unsigned int not_in_kdp;
139
140 extern vm_offset_t first_avail;
141
142 extern pmap_paddr_t avail_start;
143 extern pmap_paddr_t avail_end;
144
145 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
146 extern vm_offset_t virtual_space_end; /* End of kernel address space */
147 extern vm_offset_t static_memory_end;
148
149 extern int hard_maxproc;
150
151 #if (__ARM_VMSA__ > 7)
152 /* The number of address bits one TTBR can cover. */
153 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
154
155 /*
156 * The bounds on our TTBRs. These are for sanity checking that
157 * an address is accessible by a TTBR before we attempt to map it.
158 */
159 #define ARM64_TTBR0_MIN_ADDR (0ULL)
160 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
161 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
162 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
163
164 /* The level of the root of a page table. */
165 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
166
167 /* The number of entries in the root TT of a page table. */
168 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
169 #else
170 const uint64_t arm64_root_pgtable_level = 0;
171 const uint64_t arm64_root_pgtable_num_ttes = 0;
172 #endif
173
174 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
175 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
176
177 struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
178 vm_object_t pmap_object = &pmap_object_store;
179
180 static struct zone *pmap_zone; /* zone of pmap structures */
181
182 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA)
183 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
184 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
185
186 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA)
187 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
188
189 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA)
190
191 typedef struct page_free_entry {
192 struct page_free_entry *next;
193 } page_free_entry_t;
194
195 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
196
197 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
198 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
199 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
200
201
202 typedef struct tt_free_entry {
203 struct tt_free_entry *next;
204 } tt_free_entry_t;
205
206 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
207
208 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
209 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
210 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
211 #define FREE_PAGE_SIZE_TT_MAX 4
212 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
213 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
214 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
215 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
216 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
217 unsigned int free_tt_count MARK_AS_PMAP_DATA;
218 unsigned int free_tt_max MARK_AS_PMAP_DATA;
219
220 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
221
222 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
223 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
224 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
225
226 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
227 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
228 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
229 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
230 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
231 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
232 unsigned int inuse_pmap_pages_count = 0; /* debugging */
233
234 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
235 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
236
237 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
238 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
239
240 #if DEVELOPMENT || DEBUG
241 int nx_enabled = 1; /* enable no-execute protection */
242 int allow_data_exec = 0; /* No apps may execute data */
243 int allow_stack_exec = 0; /* No apps may execute from the stack */
244 #else /* DEVELOPMENT || DEBUG */
245 const int nx_enabled = 1; /* enable no-execute protection */
246 const int allow_data_exec = 0; /* No apps may execute data */
247 const int allow_stack_exec = 0; /* No apps may execute from the stack */
248 #endif /* DEVELOPMENT || DEBUG */
249
250 /*
251 * pv_entry_t - structure to track the active mappings for a given page
252 */
253 typedef struct pv_entry {
254 struct pv_entry *pve_next; /* next alias */
255 pt_entry_t *pve_ptep; /* page table entry */
256 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
257 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
258 * are 32-bit:
259 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
260 * pt_desc.
261 */
262 } __attribute__ ((aligned(8))) pv_entry_t;
263 #else
264 } pv_entry_t;
265 #endif
266
267 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
268
269 /*
270 * PMAP LEDGERS:
271 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
272 * as a marker for pages mapped through an "alternate accounting" mapping.
273 * These macros set, clear and test for this marker and extract the actual
274 * value of the "pve_next" pointer.
275 */
276 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
277 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
278 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
279 PVE_NEXT_ALTACCT)
280 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
281 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
282 ~PVE_NEXT_ALTACCT)
283 #define PVE_NEXT_IS_ALTACCT(pve_next) \
284 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
285 #define PVE_NEXT_PTR(pve_next) \
286 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
287 ~PVE_NEXT_ALTACCT))
288 #if MACH_ASSERT
289 static void pmap_check_ledgers(pmap_t pmap);
290 #else
291 static inline void pmap_check_ledgers(__unused pmap_t pmap) {}
292 #endif /* MACH_ASSERT */
293
294 SECURITY_READ_ONLY_LATE(pv_entry_t **) pv_head_table; /* array of pv entry pointers */
295
296 pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
297 pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
298 decl_simple_lock_data(,pv_free_list_lock MARK_AS_PMAP_DATA)
299 decl_simple_lock_data(,pv_kern_free_list_lock MARK_AS_PMAP_DATA)
300
301 decl_simple_lock_data(,phys_backup_lock)
302
303 /*
304 * pt_desc - structure to keep info on page assigned to page tables
305 */
306 #if (__ARM_VMSA__ == 7)
307 #define PT_INDEX_MAX 1
308 #else
309 #if (ARM_PGSHIFT == 14)
310 #define PT_INDEX_MAX 1
311 #else
312 #define PT_INDEX_MAX 4
313 #endif
314 #endif
315
316 #define PT_DESC_REFCOUNT 0x4000U
317 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
318
319 typedef struct pt_desc {
320 queue_chain_t pt_page;
321 struct {
322 /*
323 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
324 * For leaf pagetables, should reflect the number of non-empty PTEs
325 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
326 */
327 unsigned short refcnt;
328 /*
329 * For non-leaf pagetables, should be 0
330 * For leaf pagetables, should reflect the number of wired entries
331 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
332 */
333 unsigned short wiredcnt;
334 } pt_cnt[PT_INDEX_MAX];
335 union {
336 struct pmap *pmap;
337 };
338 struct {
339 vm_offset_t va;
340 } pt_map[PT_INDEX_MAX];
341 } pt_desc_t;
342
343
344 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
345
346 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
347
348 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
349 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
350 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
351 decl_simple_lock_data(,ptd_free_list_lock MARK_AS_PMAP_DATA)
352
353 /*
354 * physical page attribute
355 */
356 typedef u_int16_t pp_attr_t;
357
358 #define PP_ATTR_WIMG_MASK 0x003F
359 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
360
361 #define PP_ATTR_REFERENCED 0x0040
362 #define PP_ATTR_MODIFIED 0x0080
363
364 #define PP_ATTR_INTERNAL 0x0100
365 #define PP_ATTR_REUSABLE 0x0200
366 #define PP_ATTR_ALTACCT 0x0400
367 #define PP_ATTR_NOENCRYPT 0x0800
368
369 #define PP_ATTR_REFFAULT 0x1000
370 #define PP_ATTR_MODFAULT 0x2000
371
372
373 SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
374
375 typedef struct pmap_io_range
376 {
377 uint64_t addr;
378 uint32_t len;
379 uint32_t wimg; // treated as pp_attr_t
380 } __attribute__((packed)) pmap_io_range_t;
381
382 SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table;
383
384 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
385 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
386
387 SECURITY_READ_ONLY_LATE(pmap_paddr_t) io_rgn_start = 0;
388 SECURITY_READ_ONLY_LATE(pmap_paddr_t) io_rgn_end = 0;
389 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
390
391 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
392
393 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
394 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
395
396 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
397 #if defined(__arm64__)
398 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
399 #endif
400
401 /* free address spaces (1 means free) */
402 static uint32_t asid_bitmap[MAX_ASID / (sizeof(uint32_t) * NBBY)] MARK_AS_PMAP_DATA;
403
404 #if (__ARM_VMSA__ > 7)
405 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
406 #endif
407
408
409 #define pa_index(pa) \
410 (atop((pa) - vm_first_phys))
411
412 #define pai_to_pvh(pai) \
413 (&pv_head_table[pai])
414
415 #define pa_valid(x) \
416 ((x) >= vm_first_phys && (x) < vm_last_phys)
417
418 /* PTE Define Macros */
419
420 #define pte_is_wired(pte) \
421 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
422
423 #define pte_set_wired(ptep, wired) \
424 do { \
425 SInt16 *ptd_wiredcnt_ptr; \
426 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->pt_cnt[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
427 if (wired) { \
428 *ptep |= ARM_PTE_WIRED; \
429 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
430 } else { \
431 *ptep &= ~ARM_PTE_WIRED; \
432 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
433 } \
434 } while(0)
435
436 #define pte_is_ffr(pte) \
437 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
438
439 #define pte_set_ffr(pte, ffr) \
440 do { \
441 if (ffr) { \
442 pte |= ARM_PTE_WRITEABLE; \
443 } else { \
444 pte &= ~ARM_PTE_WRITEABLE; \
445 } \
446 } while(0)
447
448 /* PVE Define Macros */
449
450 #define pve_next(pve) \
451 ((pve)->pve_next)
452
453 #define pve_link_field(pve) \
454 (&pve_next(pve))
455
456 #define pve_link(pp, e) \
457 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
458
459 #define pve_unlink(pp, e) \
460 (pve_next(pp) = pve_next(e))
461
462 /* bits held in the ptep pointer field */
463
464 #define pve_get_ptep(pve) \
465 ((pve)->pve_ptep)
466
467 #define pve_set_ptep(pve, ptep_new) \
468 do { \
469 (pve)->pve_ptep = (ptep_new); \
470 } while (0)
471
472 /* PTEP Define Macros */
473
474 #if (__ARM_VMSA__ == 7)
475
476 #define ARM_PT_DESC_INDEX_MASK 0x00000
477 #define ARM_PT_DESC_INDEX_SHIFT 0
478
479 /*
480 * mask for page descriptor index: 4MB per page table
481 */
482 #define ARM_TT_PT_INDEX_MASK 0xfffU /* mask for page descriptor index: 4MB per page table */
483
484 /*
485 * Shift value used for reconstructing the virtual address for a PTE.
486 */
487 #define ARM_TT_PT_ADDR_SHIFT (10U)
488
489 #define ptep_get_va(ptep) \
490 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~0xFFF))))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
491
492 #define ptep_get_pmap(ptep) \
493 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~0xFFF))))))))->pmap))
494
495 #else
496
497 #if (ARM_PGSHIFT == 12)
498 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
499 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
500 /*
501 * mask for page descriptor index: 2MB per page table
502 */
503 #define ARM_TT_PT_INDEX_MASK (0x0fffULL)
504 /*
505 * Shift value used for reconstructing the virtual address for a PTE.
506 */
507 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
508
509 /* TODO: Give this a better name/documentation than "other" */
510 #define ARM_TT_PT_OTHER_MASK (0x0fffULL)
511
512 #else
513
514 #define ARM_PT_DESC_INDEX_MASK (0x00000)
515 #define ARM_PT_DESC_INDEX_SHIFT (0)
516 /*
517 * mask for page descriptor index: 32MB per page table
518 */
519 #define ARM_TT_PT_INDEX_MASK (0x3fffULL)
520 /*
521 * Shift value used for reconstructing the virtual address for a PTE.
522 */
523 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
524
525 /* TODO: Give this a better name/documentation than "other" */
526 #define ARM_TT_PT_OTHER_MASK (0x3fffULL)
527 #endif
528
529 #define ARM_PT_DESC_INDEX(ptep) \
530 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
531
532 #define ptep_get_va(ptep) \
533 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK))))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
534
535 #define ptep_get_pmap(ptep) \
536 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK))))))))->pmap))
537
538 #endif
539
540 #define ARM_PT_DESC_INDEX(ptep) \
541 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
542
543 #define ptep_get_ptd(ptep) \
544 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
545
546
547 /* PVH Define Macros */
548
549 /* pvhead type */
550 #define PVH_TYPE_NULL 0x0UL
551 #define PVH_TYPE_PVEP 0x1UL
552 #define PVH_TYPE_PTEP 0x2UL
553 #define PVH_TYPE_PTDP 0x3UL
554
555 #define PVH_TYPE_MASK (0x3UL)
556
557 #ifdef __arm64__
558
559 #define PVH_FLAG_IOMMU 0x4UL
560 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63)
561 #define PVH_FLAG_CPU (1ULL << 62)
562 #define PVH_LOCK_BIT 61
563 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
564 #define PVH_FLAG_EXEC (1ULL << 60)
565 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
566 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
567
568 #else /* !__arm64__ */
569
570 #define PVH_LOCK_BIT 31
571 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
572 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
573
574 #endif
575
576 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
577
578 #define pvh_test_type(h, b) \
579 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
580
581 #define pvh_ptep(h) \
582 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
583
584 #define pvh_list(h) \
585 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
586
587 #define pvh_get_flags(h) \
588 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
589
590 #define pvh_set_flags(h, f) \
591 do { \
592 __c11_atomic_store((_Atomic vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
593 memory_order_relaxed); \
594 } while (0)
595
596 #define pvh_update_head(h, e, t) \
597 do { \
598 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
599 __c11_atomic_store((_Atomic vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
600 memory_order_relaxed); \
601 } while (0)
602
603 #define pvh_update_head_unlocked(h, e, t) \
604 do { \
605 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
606 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
607 } while (0)
608
609 #define pvh_add(h, e) \
610 do { \
611 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
612 pve_next(e) = pvh_list(h); \
613 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
614 } while (0)
615
616 #define pvh_remove(h, p, e) \
617 do { \
618 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
619 if ((p) == (h)) { \
620 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
621 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
622 } else { \
623 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
624 } \
625 } else { \
626 /* \
627 * PMAP LEDGERS: \
628 * preserve the "alternate accounting" bit \
629 * when updating "p" (the previous entry's \
630 * "pve_next"). \
631 */ \
632 boolean_t __is_altacct; \
633 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
634 *(p) = PVE_NEXT_PTR(pve_next((e))); \
635 if (__is_altacct) { \
636 PVE_NEXT_SET_ALTACCT((p)); \
637 } else { \
638 PVE_NEXT_CLR_ALTACCT((p)); \
639 } \
640 } \
641 } while (0)
642
643
644 /* PPATTR Define Macros */
645
646 #define ppattr_set_bits(h, b) \
647 do { \
648 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
649 } while (0)
650
651 #define ppattr_clear_bits(h, b) \
652 do { \
653 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
654 } while (0)
655
656 #define ppattr_test_bits(h, b) \
657 ((*(pp_attr_t *)(h) & (b)) == (b))
658
659 #define pa_set_bits(x, b) \
660 do { \
661 if (pa_valid(x)) \
662 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
663 (b)); \
664 } while (0)
665
666 #define pa_test_bits(x, b) \
667 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
668 (b)) : FALSE)
669
670 #define pa_clear_bits(x, b) \
671 do { \
672 if (pa_valid(x)) \
673 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
674 (b)); \
675 } while (0)
676
677 #define pa_set_modify(x) \
678 pa_set_bits(x, PP_ATTR_MODIFIED)
679
680 #define pa_clear_modify(x) \
681 pa_clear_bits(x, PP_ATTR_MODIFIED)
682
683 #define pa_set_reference(x) \
684 pa_set_bits(x, PP_ATTR_REFERENCED)
685
686 #define pa_clear_reference(x) \
687 pa_clear_bits(x, PP_ATTR_REFERENCED)
688
689
690 #define IS_INTERNAL_PAGE(pai) \
691 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
692 #define SET_INTERNAL_PAGE(pai) \
693 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
694 #define CLR_INTERNAL_PAGE(pai) \
695 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
696
697 #define IS_REUSABLE_PAGE(pai) \
698 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
699 #define SET_REUSABLE_PAGE(pai) \
700 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
701 #define CLR_REUSABLE_PAGE(pai) \
702 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
703
704 #define IS_ALTACCT_PAGE(pai, pve_p) \
705 (((pve_p) == NULL) \
706 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
707 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
708 #define SET_ALTACCT_PAGE(pai, pve_p) \
709 if ((pve_p) == NULL) { \
710 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
711 } else { \
712 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
713 }
714 #define CLR_ALTACCT_PAGE(pai, pve_p) \
715 if ((pve_p) == NULL) { \
716 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
717 } else { \
718 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
719 }
720
721 #define IS_REFFAULT_PAGE(pai) \
722 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
723 #define SET_REFFAULT_PAGE(pai) \
724 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
725 #define CLR_REFFAULT_PAGE(pai) \
726 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
727
728 #define IS_MODFAULT_PAGE(pai) \
729 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
730 #define SET_MODFAULT_PAGE(pai) \
731 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
732 #define CLR_MODFAULT_PAGE(pai) \
733 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
734
735 #define tte_get_ptd(tte) \
736 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
737
738
739 #if (__ARM_VMSA__ == 7)
740
741 #define tte_index(pmap, addr) \
742 ttenum((addr))
743
744 #else
745
746 #define tt0_index(pmap, addr) \
747 (((addr) & ARM_TT_L0_INDEX_MASK) >> ARM_TT_L0_SHIFT)
748
749 #define tt1_index(pmap, addr) \
750 (((addr) & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT)
751
752 #define tt2_index(pmap, addr) \
753 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
754
755 #define tt3_index(pmap, addr) \
756 (((addr) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)
757
758 #define tte_index(pmap, addr) \
759 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
760
761 #endif
762
763 /*
764 * Lock on pmap system
765 */
766
767 #define PMAP_LOCK_INIT(pmap) { \
768 simple_lock_init(&(pmap)->lock, 0); \
769 }
770
771 #define PMAP_LOCK(pmap) { \
772 pmap_simple_lock(&(pmap)->lock); \
773 }
774
775 #define PMAP_UNLOCK(pmap) { \
776 pmap_simple_unlock(&(pmap)->lock); \
777 }
778
779 #if MACH_ASSERT
780 #define PMAP_ASSERT_LOCKED(pmap) { \
781 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
782 }
783 #else
784 #define PMAP_ASSERT_LOCKED(pmap)
785 #endif
786
787 #if defined(__arm64__)
788 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
789 #else
790 #define PVH_LOCK_WORD 0
791 #endif
792
793 #define ASSERT_PVH_LOCKED(index) \
794 do { \
795 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
796 } while (0)
797
798 #define LOCK_PVH(index) \
799 do { \
800 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
801 } while (0)
802
803 #define UNLOCK_PVH(index) \
804 do { \
805 ASSERT_PVH_LOCKED(index); \
806 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
807 } while (0)
808
809 #define PMAP_UPDATE_TLBS(pmap, s, e) { \
810 flush_mmu_tlb_region_asid_async(s, (unsigned)(e - s), pmap); \
811 sync_tlb_flush(); \
812 }
813
814 #ifdef __ARM_L1_PTW__
815
816 #define FLUSH_PTE_RANGE(spte, epte) \
817 __builtin_arm_dmb(DMB_ISH);
818
819 #define FLUSH_PTE(pte_p) \
820 __builtin_arm_dmb(DMB_ISH);
821
822 #define FLUSH_PTE_STRONG(pte_p) \
823 __builtin_arm_dsb(DSB_ISH);
824
825 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
826 __builtin_arm_dsb(DSB_ISH);
827
828 #else /* __ARM_L1_PTW */
829
830 #define FLUSH_PTE_RANGE(spte, epte) \
831 CleanPoU_DcacheRegion((vm_offset_t)spte, \
832 (vm_offset_t)epte - (vm_offset_t)spte);
833
834 #define FLUSH_PTE(pte_p) \
835 __unreachable_ok_push \
836 if (TEST_PAGE_RATIO_4) \
837 FLUSH_PTE_RANGE((pte_p), (pte_p) + 4); \
838 else \
839 FLUSH_PTE_RANGE((pte_p), (pte_p) + 1); \
840 CleanPoU_DcacheRegion((vm_offset_t)pte_p, sizeof(pt_entry_t)); \
841 __unreachable_ok_pop
842
843 #define FLUSH_PTE_STRONG(pte_p) FLUSH_PTE(pte_p)
844
845 #define FLUSH_PTE_RANGE_STRONG(spte, epte) FLUSH_PTE_RANGE(spte, epte)
846
847 #endif /* !defined(__ARM_L1_PTW) */
848
849 #define WRITE_PTE_FAST(pte_p, pte_entry) \
850 __unreachable_ok_push \
851 if (TEST_PAGE_RATIO_4) { \
852 if (((unsigned)(pte_p)) & 0x1f) \
853 panic("WRITE_PTE\n"); \
854 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
855 *(pte_p) = (pte_entry); \
856 *((pte_p)+1) = (pte_entry); \
857 *((pte_p)+2) = (pte_entry); \
858 *((pte_p)+3) = (pte_entry); \
859 } else { \
860 *(pte_p) = (pte_entry); \
861 *((pte_p)+1) = (pte_entry) | 0x1000; \
862 *((pte_p)+2) = (pte_entry) | 0x2000; \
863 *((pte_p)+3) = (pte_entry) | 0x3000; \
864 } \
865 } else { \
866 *(pte_p) = (pte_entry); \
867 } \
868 __unreachable_ok_pop
869
870 #define WRITE_PTE(pte_p, pte_entry) \
871 WRITE_PTE_FAST(pte_p, pte_entry); \
872 FLUSH_PTE(pte_p);
873
874 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
875 WRITE_PTE_FAST(pte_p, pte_entry); \
876 FLUSH_PTE_STRONG(pte_p);
877
878 /*
879 * Other useful macros.
880 */
881 #define current_pmap() \
882 (vm_map_pmap(current_thread()->map))
883
884
885 #define VALIDATE_USER_PMAP(x)
886 #define VALIDATE_PMAP(x)
887 #define VALIDATE_LEDGER(x)
888
889
890 #if DEVELOPMENT || DEBUG
891
892 /*
893 * Trace levels are controlled by a bitmask in which each
894 * level can be enabled/disabled by the (1<<level) position
895 * in the boot arg
896 * Level 1: pmap lifecycle (create/destroy/switch)
897 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
898 * Level 3: internal state management (tte/attributes/fast-fault)
899 */
900
901 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
902
903 #define PMAP_TRACE(level, ...) \
904 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
905 KDBG_RELEASE(__VA_ARGS__); \
906 }
907 #else
908
909 #define PMAP_TRACE(level, ...)
910
911 #endif
912
913
914 /*
915 * Internal function prototypes (forward declarations).
916 */
917
918 static void pv_init(
919 void);
920
921 static boolean_t pv_alloc(
922 pmap_t pmap,
923 unsigned int pai,
924 pv_entry_t **pvepp);
925
926 static void pv_free(
927 pv_entry_t *pvep);
928
929 static void pv_list_free(
930 pv_entry_t *pvehp,
931 pv_entry_t *pvetp,
932 unsigned int cnt);
933
934 static void ptd_bootstrap(
935 pt_desc_t *ptdp, unsigned int ptd_cnt);
936
937 static inline pt_desc_t *ptd_alloc_unlinked(void);
938
939 static pt_desc_t *ptd_alloc(pmap_t pmap);
940
941 static void ptd_deallocate(pt_desc_t *ptdp);
942
943 static void ptd_init(
944 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
945
946 static void pmap_zone_init(
947 void);
948
949 static void pmap_set_reference(
950 ppnum_t pn);
951
952 ppnum_t pmap_vtophys(
953 pmap_t pmap, addr64_t va);
954
955 void pmap_switch_user_ttb(
956 pmap_t pmap);
957
958 static void flush_mmu_tlb_region_asid_async(
959 vm_offset_t va, unsigned length, pmap_t pmap);
960
961 static kern_return_t pmap_expand(
962 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
963
964 static int pmap_remove_range(
965 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
966
967 static int pmap_remove_range_options(
968 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, int);
969
970 static tt_entry_t *pmap_tt1_allocate(
971 pmap_t, vm_size_t, unsigned int);
972
973 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
974
975 static void pmap_tt1_deallocate(
976 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
977
978 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
979
980 static kern_return_t pmap_tt_allocate(
981 pmap_t, tt_entry_t **, unsigned int, unsigned int);
982
983 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
984
985 static void pmap_tte_deallocate(
986 pmap_t, tt_entry_t *, unsigned int);
987
988 #define PMAP_TT_L1_LEVEL 0x1
989 #define PMAP_TT_L2_LEVEL 0x2
990 #define PMAP_TT_L3_LEVEL 0x3
991 #if (__ARM_VMSA__ == 7)
992 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
993 #else
994 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
995 #endif
996
997 #ifdef __ARM64_PMAP_SUBPAGE_L1__
998 #if (__ARM_VMSA__ <= 7)
999 #error This is not supported for old-style page tables
1000 #endif
1001 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1002 #else
1003 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1004 #endif
1005
1006 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1007 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1008 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1009
1010 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1011
1012 #if (__ARM_VMSA__ > 7)
1013
1014 static inline tt_entry_t *pmap_tt1e(
1015 pmap_t, vm_map_address_t);
1016
1017 static inline tt_entry_t *pmap_tt2e(
1018 pmap_t, vm_map_address_t);
1019
1020 static inline pt_entry_t *pmap_tt3e(
1021 pmap_t, vm_map_address_t);
1022
1023 static void pmap_unmap_sharedpage(
1024 pmap_t pmap);
1025
1026 static boolean_t
1027 pmap_is_64bit(pmap_t);
1028
1029
1030 #endif
1031 static inline tt_entry_t *pmap_tte(
1032 pmap_t, vm_map_address_t);
1033
1034 static inline pt_entry_t *pmap_pte(
1035 pmap_t, vm_map_address_t);
1036
1037 static void pmap_update_cache_attributes_locked(
1038 ppnum_t, unsigned);
1039
1040 boolean_t arm_clear_fast_fault(
1041 ppnum_t ppnum,
1042 vm_prot_t fault_type);
1043
1044 static pmap_paddr_t pmap_pages_reclaim(
1045 void);
1046
1047 static kern_return_t pmap_pages_alloc(
1048 pmap_paddr_t *pa,
1049 unsigned size,
1050 unsigned option);
1051
1052 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1053 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1054
1055 static void pmap_pages_free(
1056 pmap_paddr_t pa,
1057 unsigned size);
1058
1059 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1060
1061 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1062
1063
1064 static void pmap_trim_self(pmap_t pmap);
1065 static void pmap_trim_subord(pmap_t subord);
1066
1067 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1068 static __return_type __function_name##_internal __function_args;
1069
1070 PMAP_SUPPORT_PROTOTYPES(
1071 kern_return_t,
1072 arm_fast_fault, (pmap_t pmap,
1073 vm_map_address_t va,
1074 vm_prot_t fault_type,
1075 boolean_t from_user), ARM_FAST_FAULT_INDEX);
1076
1077
1078 PMAP_SUPPORT_PROTOTYPES(
1079 boolean_t,
1080 arm_force_fast_fault, (ppnum_t ppnum,
1081 vm_prot_t allow_mode,
1082 int options), ARM_FORCE_FAST_FAULT_INDEX);
1083
1084 PMAP_SUPPORT_PROTOTYPES(
1085 kern_return_t,
1086 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
1087
1088 PMAP_SUPPORT_PROTOTYPES(
1089 kern_return_t,
1090 mapping_replenish, (void), MAPPING_REPLENISH_INDEX);
1091
1092 PMAP_SUPPORT_PROTOTYPES(
1093 boolean_t,
1094 pmap_batch_set_cache_attributes, (ppnum_t pn,
1095 unsigned int cacheattr,
1096 unsigned int page_cnt,
1097 unsigned int page_index,
1098 boolean_t doit,
1099 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
1100
1101 PMAP_SUPPORT_PROTOTYPES(
1102 void,
1103 pmap_change_wiring, (pmap_t pmap,
1104 vm_map_address_t v,
1105 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
1106
1107 PMAP_SUPPORT_PROTOTYPES(
1108 pmap_t,
1109 pmap_create, (ledger_t ledger,
1110 vm_map_size_t size,
1111 boolean_t is_64bit), PMAP_CREATE_INDEX);
1112
1113 PMAP_SUPPORT_PROTOTYPES(
1114 void,
1115 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
1116
1117 PMAP_SUPPORT_PROTOTYPES(
1118 kern_return_t,
1119 pmap_enter_options, (pmap_t pmap,
1120 vm_map_address_t v,
1121 ppnum_t pn,
1122 vm_prot_t prot,
1123 vm_prot_t fault_type,
1124 unsigned int flags,
1125 boolean_t wired,
1126 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
1127
1128 PMAP_SUPPORT_PROTOTYPES(
1129 vm_offset_t,
1130 pmap_extract, (pmap_t pmap,
1131 vm_map_address_t va), PMAP_EXTRACT_INDEX);
1132
1133 PMAP_SUPPORT_PROTOTYPES(
1134 ppnum_t,
1135 pmap_find_phys, (pmap_t pmap,
1136 addr64_t va), PMAP_FIND_PHYS_INDEX);
1137
1138 #if (__ARM_VMSA__ > 7)
1139 PMAP_SUPPORT_PROTOTYPES(
1140 kern_return_t,
1141 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
1142 #endif
1143
1144
1145 PMAP_SUPPORT_PROTOTYPES(
1146 boolean_t,
1147 pmap_is_empty, (pmap_t pmap,
1148 vm_map_offset_t va_start,
1149 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
1150
1151
1152 PMAP_SUPPORT_PROTOTYPES(
1153 unsigned int,
1154 pmap_map_cpu_windows_copy, (ppnum_t pn,
1155 vm_prot_t prot,
1156 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
1157
1158 PMAP_SUPPORT_PROTOTYPES(
1159 kern_return_t,
1160 pmap_nest, (pmap_t grand,
1161 pmap_t subord,
1162 addr64_t vstart,
1163 addr64_t nstart,
1164 uint64_t size), PMAP_NEST_INDEX);
1165
1166 PMAP_SUPPORT_PROTOTYPES(
1167 void,
1168 pmap_page_protect_options, (ppnum_t ppnum,
1169 vm_prot_t prot,
1170 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
1171
1172 PMAP_SUPPORT_PROTOTYPES(
1173 void,
1174 pmap_protect_options, (pmap_t pmap,
1175 vm_map_address_t start,
1176 vm_map_address_t end,
1177 vm_prot_t prot,
1178 unsigned int options,
1179 void *args), PMAP_PROTECT_OPTIONS_INDEX);
1180
1181 PMAP_SUPPORT_PROTOTYPES(
1182 kern_return_t,
1183 pmap_query_page_info, (pmap_t pmap,
1184 vm_map_offset_t va,
1185 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
1186
1187 PMAP_SUPPORT_PROTOTYPES(
1188 mach_vm_size_t,
1189 pmap_query_resident, (pmap_t pmap,
1190 vm_map_address_t start,
1191 vm_map_address_t end,
1192 mach_vm_size_t *compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
1193
1194 PMAP_SUPPORT_PROTOTYPES(
1195 void,
1196 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
1197
1198 PMAP_SUPPORT_PROTOTYPES(
1199 int,
1200 pmap_remove_options, (pmap_t pmap,
1201 vm_map_address_t start,
1202 vm_map_address_t end,
1203 int options), PMAP_REMOVE_OPTIONS_INDEX);
1204
1205 PMAP_SUPPORT_PROTOTYPES(
1206 kern_return_t,
1207 pmap_return, (boolean_t do_panic,
1208 boolean_t do_recurse), PMAP_RETURN_INDEX);
1209
1210 PMAP_SUPPORT_PROTOTYPES(
1211 void,
1212 pmap_set_cache_attributes, (ppnum_t pn,
1213 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1214
1215 PMAP_SUPPORT_PROTOTYPES(
1216 void,
1217 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1218
1219 #if MACH_ASSERT
1220 PMAP_SUPPORT_PROTOTYPES(
1221 void,
1222 pmap_set_process, (pmap_t pmap,
1223 int pid,
1224 char *procname), PMAP_SET_PROCESS_INDEX);
1225 #endif
1226
1227 PMAP_SUPPORT_PROTOTYPES(
1228 void,
1229 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1230
1231 PMAP_SUPPORT_PROTOTYPES(
1232 kern_return_t,
1233 pmap_unnest_options, (pmap_t grand,
1234 addr64_t vaddr,
1235 uint64_t size,
1236 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1237
1238
1239 PMAP_SUPPORT_PROTOTYPES(
1240 void,
1241 phys_attribute_set, (ppnum_t pn,
1242 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1243
1244
1245 PMAP_SUPPORT_PROTOTYPES(
1246 void,
1247 phys_attribute_clear, (ppnum_t pn,
1248 unsigned int bits,
1249 int options,
1250 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1251
1252 PMAP_SUPPORT_PROTOTYPES(
1253 void,
1254 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1255
1256 PMAP_SUPPORT_PROTOTYPES(
1257 void,
1258 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
1259
1260 PMAP_SUPPORT_PROTOTYPES(
1261 void,
1262 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
1263
1264
1265 PMAP_SUPPORT_PROTOTYPES(
1266 void,
1267 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
1268
1269 PMAP_SUPPORT_PROTOTYPES(
1270 void,
1271 pmap_trim, (pmap_t grand,
1272 pmap_t subord,
1273 addr64_t vstart,
1274 addr64_t nstart,
1275 uint64_t size), PMAP_TRIM_INDEX);
1276
1277
1278
1279
1280
1281 void pmap_footprint_suspend(vm_map_t map,
1282 boolean_t suspend);
1283 PMAP_SUPPORT_PROTOTYPES(
1284 void,
1285 pmap_footprint_suspend, (vm_map_t map,
1286 boolean_t suspend),
1287 PMAP_FOOTPRINT_SUSPEND_INDEX);
1288
1289
1290 #if CONFIG_PGTRACE
1291 boolean_t pgtrace_enabled = 0;
1292
1293 typedef struct {
1294 queue_chain_t chain;
1295
1296 /*
1297 pmap - pmap for below addresses
1298 ova - original va page address
1299 cva - clone va addresses for pre, target and post pages
1300 cva_spte - clone saved ptes
1301 range - trace range in this map
1302 cloned - has been cloned or not
1303 */
1304 pmap_t pmap;
1305 vm_map_offset_t ova;
1306 vm_map_offset_t cva[3];
1307 pt_entry_t cva_spte[3];
1308 struct {
1309 pmap_paddr_t start;
1310 pmap_paddr_t end;
1311 } range;
1312 bool cloned;
1313 } pmap_pgtrace_map_t;
1314
1315 static void pmap_pgtrace_init(void);
1316 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
1317 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
1318 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
1319 #endif
1320
1321 #if (__ARM_VMSA__ > 7)
1322 /*
1323 * The low global vector page is mapped at a fixed alias.
1324 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1325 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1326 * to check both addresses anyway for backward compatibility. So for now
1327 * we leave H6 and H7 where they were.
1328 */
1329 #if (ARM_PGSHIFT == 14)
1330 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1331 #else
1332 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1333 #endif
1334
1335 #else
1336 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1337 #endif
1338
1339 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1340 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1341 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1342 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
1343
1344 int pt_fake_zone_index = -1; /* index of pmap fake zone */
1345
1346
1347
1348 /*
1349 * Allocates and initializes a per-CPU data structure for the pmap.
1350 */
1351 MARK_AS_PMAP_TEXT static void
1352 pmap_cpu_data_init_internal(unsigned int cpu_number)
1353 {
1354 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
1355
1356 pmap_cpu_data->cpu_number = cpu_number;
1357 }
1358
1359 void
1360 pmap_cpu_data_init(void)
1361 {
1362 pmap_cpu_data_init_internal(cpu_number());
1363 }
1364
1365 static void
1366 pmap_cpu_data_array_init(void)
1367 {
1368
1369 pmap_cpu_data_init();
1370 }
1371
1372 pmap_cpu_data_t *
1373 pmap_get_cpu_data(void)
1374 {
1375 pmap_cpu_data_t * pmap_cpu_data = NULL;
1376
1377 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
1378
1379 return pmap_cpu_data;
1380 }
1381
1382
1383 /* TODO */
1384 pmap_paddr_t
1385 pmap_pages_reclaim(
1386 void)
1387 {
1388 boolean_t found_page;
1389 unsigned i;
1390 pt_desc_t *ptdp;
1391
1392
1393 /*
1394 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1395 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1396 * entry and it contains at least one valid pte entry.
1397 *
1398 * In a loop, check for a page in the reclaimed pt page list.
1399 * if one is present, unlink that page and return the physical page address.
1400 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1401 * If found, invoke pmap_remove_range() on its pmap and address range then
1402 * deallocates that pt page. This will end up adding the pt page to the
1403 * reclaimed pt page list.
1404 * If no eligible page were found in the pt page list, panic.
1405 */
1406
1407 pmap_simple_lock(&pmap_pages_lock);
1408 pmap_pages_request_count++;
1409 pmap_pages_request_acum++;
1410
1411 while (1) {
1412
1413 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
1414 page_free_entry_t *page_entry;
1415
1416 page_entry = pmap_pages_reclaim_list;
1417 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
1418 pmap_simple_unlock(&pmap_pages_lock);
1419
1420 return((pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry));
1421 }
1422
1423 pmap_simple_unlock(&pmap_pages_lock);
1424
1425 pmap_simple_lock(&pt_pages_lock);
1426 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
1427 found_page = FALSE;
1428
1429 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
1430 if ((ptdp->pmap->nested == FALSE)
1431 && (pmap_simple_lock_try(&ptdp->pmap->lock))) {
1432
1433 assert(ptdp->pmap != kernel_pmap);
1434 unsigned refcnt_acc = 0;
1435 unsigned wiredcnt_acc = 0;
1436
1437 for (i = 0 ; i < PT_INDEX_MAX ; i++) {
1438 if (ptdp->pt_cnt[i].refcnt == PT_DESC_REFCOUNT) {
1439 /* Do not attempt to free a page that contains an L2 table */
1440 refcnt_acc = 0;
1441 break;
1442 }
1443 refcnt_acc += ptdp->pt_cnt[i].refcnt;
1444 wiredcnt_acc += ptdp->pt_cnt[i].wiredcnt;
1445 }
1446 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
1447 found_page = TRUE;
1448 /* Leave ptdp->pmap locked here. We're about to reclaim
1449 * a tt page from it, so we don't want anyone else messing
1450 * with it while we do that. */
1451 break;
1452 }
1453 pmap_simple_unlock(&ptdp->pmap->lock);
1454 }
1455 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
1456 }
1457 if (!found_page) {
1458 panic("pmap_pages_reclaim(): No eligible page in pt_page_list\n");
1459 } else {
1460 int remove_count = 0;
1461 vm_map_address_t va;
1462 pmap_t pmap;
1463 pt_entry_t *bpte, *epte;
1464 pt_entry_t *pte_p;
1465 tt_entry_t *tte_p;
1466 uint32_t rmv_spte=0;
1467
1468 pmap_simple_unlock(&pt_pages_lock);
1469 pmap = ptdp->pmap;
1470 PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
1471 for (i = 0 ; i < PT_INDEX_MAX ; i++) {
1472 va = ptdp->pt_map[i].va;
1473
1474 /* If the VA is bogus, this may represent an unallocated region
1475 * or one which is in transition (already being freed or expanded).
1476 * Don't try to remove mappings here. */
1477 if (va == (vm_offset_t)-1)
1478 continue;
1479
1480 tte_p = pmap_tte(pmap, va);
1481 if ((tte_p != (tt_entry_t *) NULL)
1482 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
1483
1484 #if (__ARM_VMSA__ == 7)
1485 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1486 bpte = &pte_p[ptenum(va)];
1487 epte = bpte + PAGE_SIZE/sizeof(pt_entry_t);
1488 #else
1489 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1490 bpte = &pte_p[tt3_index(pmap, va)];
1491 epte = bpte + PAGE_SIZE/sizeof(pt_entry_t);
1492 #endif
1493 /*
1494 * Use PMAP_OPTIONS_REMOVE to clear any
1495 * "compressed" markers and update the
1496 * "compressed" counter in pmap->stats.
1497 * This means that we lose accounting for
1498 * any compressed pages in this range
1499 * but the alternative is to not be able
1500 * to account for their future decompression,
1501 * which could cause the counter to drift
1502 * more and more.
1503 */
1504 remove_count += pmap_remove_range_options(
1505 pmap, va, bpte, epte,
1506 &rmv_spte, PMAP_OPTIONS_REMOVE);
1507 if (ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0)
1508 panic("pmap_pages_reclaim(): ptdp %p, count %d\n", ptdp, ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt);
1509 #if (__ARM_VMSA__ == 7)
1510 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
1511 flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->asid & 0xff));
1512 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->asid & 0xff));
1513 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2*ARM_TT_L1_SIZE)| (pmap->asid & 0xff));
1514 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3*ARM_TT_L1_SIZE)| (pmap->asid & 0xff));
1515 #else
1516 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
1517 flush_mmu_tlb_entry_async(tlbi_addr(va & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
1518 #endif
1519
1520 if (remove_count > 0) {
1521 #if (__ARM_VMSA__ == 7)
1522 flush_mmu_tlb_region_asid_async(va, 4*ARM_TT_L1_SIZE, pmap);
1523 #else
1524 flush_mmu_tlb_region_asid_async(va, ARM_TT_L2_SIZE, pmap);
1525 #endif
1526 }
1527 }
1528 }
1529 sync_tlb_flush();
1530 // Undo the lock we grabbed when we found ptdp above
1531 PMAP_UNLOCK(pmap);
1532 }
1533 pmap_simple_lock(&pmap_pages_lock);
1534 }
1535 }
1536
1537
1538 static kern_return_t
1539 pmap_pages_alloc(
1540 pmap_paddr_t *pa,
1541 unsigned size,
1542 unsigned option)
1543 {
1544 vm_page_t m = VM_PAGE_NULL, m_prev;
1545
1546 if(option & PMAP_PAGES_RECLAIM_NOWAIT) {
1547 assert(size == PAGE_SIZE);
1548 *pa = pmap_pages_reclaim();
1549 return KERN_SUCCESS;
1550 }
1551 if (size == PAGE_SIZE) {
1552 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
1553 if(option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1554 return KERN_RESOURCE_SHORTAGE;
1555 }
1556
1557 VM_PAGE_WAIT();
1558 }
1559 vm_page_lock_queues();
1560 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
1561 vm_page_unlock_queues();
1562 }
1563 if (size == 2*PAGE_SIZE) {
1564 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
1565 if(option & PMAP_PAGES_ALLOCATE_NOWAIT)
1566 return KERN_RESOURCE_SHORTAGE;
1567
1568 VM_PAGE_WAIT();
1569 }
1570 }
1571
1572 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
1573
1574 vm_object_lock(pmap_object);
1575 while (m != VM_PAGE_NULL) {
1576 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
1577 m_prev = m;
1578 m = NEXT_PAGE(m_prev);
1579 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
1580 }
1581 vm_object_unlock(pmap_object);
1582
1583 OSAddAtomic(size>>PAGE_SHIFT, &inuse_pmap_pages_count);
1584 OSAddAtomic64(size>>PAGE_SHIFT, &alloc_pmap_pages_count);
1585
1586 return KERN_SUCCESS;
1587 }
1588
1589
1590 static void
1591 pmap_pages_free(
1592 pmap_paddr_t pa,
1593 unsigned size)
1594 {
1595 pmap_simple_lock(&pmap_pages_lock);
1596
1597 if (pmap_pages_request_count != 0) {
1598 page_free_entry_t *page_entry;
1599
1600 pmap_pages_request_count--;
1601 page_entry = (page_free_entry_t *)phystokv(pa);
1602 page_entry->next = pmap_pages_reclaim_list;
1603 pmap_pages_reclaim_list = page_entry;
1604 pmap_simple_unlock(&pmap_pages_lock);
1605
1606 return;
1607 }
1608
1609 pmap_simple_unlock(&pmap_pages_lock);
1610
1611 vm_page_t m;
1612 pmap_paddr_t pa_max;
1613
1614 OSAddAtomic(-(size>>PAGE_SHIFT), &inuse_pmap_pages_count);
1615
1616 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
1617 vm_object_lock(pmap_object);
1618 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
1619 assert(m != VM_PAGE_NULL);
1620 assert(VM_PAGE_WIRED(m));
1621 vm_page_lock_queues();
1622 vm_page_free(m);
1623 vm_page_unlock_queues();
1624 vm_object_unlock(pmap_object);
1625 }
1626 }
1627
1628 static inline void
1629 PMAP_ZINFO_PALLOC(
1630 pmap_t pmap, int bytes)
1631 {
1632 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
1633 }
1634
1635 static inline void
1636 PMAP_ZINFO_PFREE(
1637 pmap_t pmap,
1638 int bytes)
1639 {
1640 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
1641 }
1642
1643 static inline void
1644 pmap_tt_ledger_credit(
1645 pmap_t pmap,
1646 vm_size_t size)
1647 {
1648 if (pmap != kernel_pmap) {
1649 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
1650 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
1651 }
1652 }
1653
1654 static inline void
1655 pmap_tt_ledger_debit(
1656 pmap_t pmap,
1657 vm_size_t size)
1658 {
1659 if (pmap != kernel_pmap) {
1660 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
1661 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
1662 }
1663 }
1664
1665 static unsigned int
1666 alloc_asid(
1667 void)
1668 {
1669 unsigned int asid_bitmap_index;
1670
1671 pmap_simple_lock(&pmaps_lock);
1672 for (asid_bitmap_index = 0; asid_bitmap_index < (MAX_ASID / (sizeof(uint32_t) * NBBY)); asid_bitmap_index++) {
1673 unsigned int temp = ffs(asid_bitmap[asid_bitmap_index]);
1674 if (temp > 0) {
1675 temp -= 1;
1676 asid_bitmap[asid_bitmap_index] &= ~(1 << temp);
1677 #if __ARM_KERNEL_PROTECT__
1678 /*
1679 * We need two ASIDs: n and (n | 1). n is used for EL0,
1680 * (n | 1) for EL1.
1681 */
1682 unsigned int temp2 = temp | 1;
1683 assert(temp2 < MAX_ASID);
1684 assert(temp2 < 32);
1685 assert(temp2 != temp);
1686 assert(asid_bitmap[asid_bitmap_index] & (1 << temp2));
1687
1688 /* Grab the second ASID. */
1689 asid_bitmap[asid_bitmap_index] &= ~(1 << temp2);
1690 #endif /* __ARM_KERNEL_PROTECT__ */
1691 pmap_simple_unlock(&pmaps_lock);
1692
1693 /*
1694 * We should never vend out physical ASID 0 through this
1695 * method, as it belongs to the kernel.
1696 */
1697 assert(((asid_bitmap_index * sizeof(uint32_t) * NBBY + temp) % ARM_MAX_ASID) != 0);
1698
1699 #if __ARM_KERNEL_PROTECT__
1700 /* Or the kernel EL1 ASID. */
1701 assert(((asid_bitmap_index * sizeof(uint32_t) * NBBY + temp) % ARM_MAX_ASID) != 1);
1702 #endif /* __ARM_KERNEL_PROTECT__ */
1703
1704 return (asid_bitmap_index * sizeof(uint32_t) * NBBY + temp);
1705 }
1706 }
1707 pmap_simple_unlock(&pmaps_lock);
1708 /*
1709 * ToDo: Add code to deal with pmap with no asid panic for now. Not
1710 * an issue with the small config process hard limit
1711 */
1712 panic("alloc_asid(): out of ASID number");
1713 return MAX_ASID;
1714 }
1715
1716 static void
1717 free_asid(
1718 int asid)
1719 {
1720 /* Don't free up any alias of physical ASID 0. */
1721 assert((asid % ARM_MAX_ASID) != 0);
1722
1723 pmap_simple_lock(&pmaps_lock);
1724 setbit(asid, (int *) asid_bitmap);
1725
1726 #if __ARM_KERNEL_PROTECT__
1727 assert((asid | 1) < MAX_ASID);
1728 assert((asid | 1) != asid);
1729 setbit(asid | 1, (int *) asid_bitmap);
1730 #endif /* __ARM_KERNEL_PROTECT__ */
1731
1732 pmap_simple_unlock(&pmaps_lock);
1733 }
1734
1735 #ifndef PMAP_PV_LOAD_FACTOR
1736 #define PMAP_PV_LOAD_FACTOR 1
1737 #endif
1738
1739 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
1740 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
1741 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
1742 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
1743 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
1744 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
1745
1746 uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
1747 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
1748 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
1749
1750 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
1751 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
1752 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
1753 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
1754
1755 thread_t mapping_replenish_thread;
1756 event_t mapping_replenish_event;
1757 event_t pmap_user_pv_throttle_event;
1758 volatile uint32_t mappingrecurse = 0;
1759
1760 uint64_t pmap_pv_throttle_stat;
1761 uint64_t pmap_pv_throttled_waiters;
1762
1763 unsigned pmap_mapping_thread_wakeups;
1764 unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA;
1765 unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA;
1766 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
1767
1768
1769 static void
1770 pv_init(
1771 void)
1772 {
1773 simple_lock_init(&pv_free_list_lock, 0);
1774 simple_lock_init(&pv_kern_free_list_lock, 0);
1775 pv_free_list = PV_ENTRY_NULL;
1776 pv_free_count = 0x0U;
1777 pv_kern_free_list = PV_ENTRY_NULL;
1778 pv_kern_free_count = 0x0U;
1779 }
1780
1781 static inline void PV_ALLOC(pv_entry_t **pv_ep);
1782 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
1783 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
1784 static inline void PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
1785
1786 static inline void pmap_pv_throttle(pmap_t p);
1787
1788 static boolean_t
1789 pv_alloc(
1790 pmap_t pmap,
1791 unsigned int pai,
1792 pv_entry_t **pvepp)
1793 {
1794 if (pmap != NULL)
1795 PMAP_ASSERT_LOCKED(pmap);
1796 ASSERT_PVH_LOCKED(pai);
1797 PV_ALLOC(pvepp);
1798 if (PV_ENTRY_NULL == *pvepp) {
1799
1800 if ((pmap == NULL) || (kernel_pmap == pmap)) {
1801
1802 PV_KERN_ALLOC(pvepp);
1803
1804 if (PV_ENTRY_NULL == *pvepp) {
1805 pv_entry_t *pv_e;
1806 pv_entry_t *pv_eh;
1807 pv_entry_t *pv_et;
1808 int pv_cnt;
1809 unsigned j;
1810 pmap_paddr_t pa;
1811 kern_return_t ret;
1812
1813 UNLOCK_PVH(pai);
1814 if (pmap != NULL)
1815 PMAP_UNLOCK(pmap);
1816
1817 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
1818
1819 if (ret == KERN_RESOURCE_SHORTAGE) {
1820 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
1821 }
1822
1823 if (ret != KERN_SUCCESS) {
1824 panic("%s: failed to alloc page for kernel, ret=%d, "
1825 "pmap=%p, pai=%u, pvepp=%p",
1826 __FUNCTION__, ret,
1827 pmap, pai, pvepp);
1828 }
1829
1830 pv_page_count++;
1831
1832 pv_e = (pv_entry_t *)phystokv(pa);
1833 pv_cnt = 0;
1834 pv_eh = pv_et = PV_ENTRY_NULL;
1835 *pvepp = pv_e;
1836 pv_e++;
1837
1838 for (j = 1; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
1839 pv_e->pve_next = pv_eh;
1840 pv_eh = pv_e;
1841
1842 if (pv_et == PV_ENTRY_NULL)
1843 pv_et = pv_e;
1844 pv_cnt++;
1845 pv_e++;
1846 }
1847 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
1848 if (pmap != NULL)
1849 PMAP_LOCK(pmap);
1850 LOCK_PVH(pai);
1851 return FALSE;
1852 }
1853 } else {
1854 UNLOCK_PVH(pai);
1855 PMAP_UNLOCK(pmap);
1856 pmap_pv_throttle(pmap);
1857 {
1858 pv_entry_t *pv_e;
1859 pv_entry_t *pv_eh;
1860 pv_entry_t *pv_et;
1861 int pv_cnt;
1862 unsigned j;
1863 pmap_paddr_t pa;
1864 kern_return_t ret;
1865
1866 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
1867
1868 if (ret != KERN_SUCCESS) {
1869 panic("%s: failed to alloc page, ret=%d, "
1870 "pmap=%p, pai=%u, pvepp=%p",
1871 __FUNCTION__, ret,
1872 pmap, pai, pvepp);
1873 }
1874
1875 pv_page_count++;
1876
1877 pv_e = (pv_entry_t *)phystokv(pa);
1878 pv_cnt = 0;
1879 pv_eh = pv_et = PV_ENTRY_NULL;
1880 *pvepp = pv_e;
1881 pv_e++;
1882
1883 for (j = 1; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
1884 pv_e->pve_next = pv_eh;
1885 pv_eh = pv_e;
1886
1887 if (pv_et == PV_ENTRY_NULL)
1888 pv_et = pv_e;
1889 pv_cnt++;
1890 pv_e++;
1891 }
1892 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
1893 }
1894 PMAP_LOCK(pmap);
1895 LOCK_PVH(pai);
1896 return FALSE;
1897 }
1898 }
1899 assert(PV_ENTRY_NULL != *pvepp);
1900 return TRUE;
1901 }
1902
1903 static void
1904 pv_free(
1905 pv_entry_t *pvep)
1906 {
1907 PV_FREE_LIST(pvep, pvep, 1);
1908 }
1909
1910 static void
1911 pv_list_free(
1912 pv_entry_t *pvehp,
1913 pv_entry_t *pvetp,
1914 unsigned int cnt)
1915 {
1916 PV_FREE_LIST(pvehp, pvetp, cnt);
1917 }
1918
1919 static inline void
1920 pv_water_mark_check(void)
1921 {
1922 if ((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark)) {
1923 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
1924 thread_wakeup(&mapping_replenish_event);
1925 }
1926 }
1927
1928 static inline void PV_ALLOC(pv_entry_t **pv_ep) {
1929 assert(*pv_ep == PV_ENTRY_NULL);
1930 pmap_simple_lock(&pv_free_list_lock);
1931 /*
1932 * If the kernel reserved pool is low, let non-kernel mappings allocate
1933 * synchronously, possibly subject to a throttle.
1934 */
1935 if ((pv_kern_free_count >= pv_kern_low_water_mark) && ((*pv_ep = pv_free_list) != 0)) {
1936 pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
1937 (*pv_ep)->pve_next = PV_ENTRY_NULL;
1938 pv_free_count--;
1939 }
1940
1941 pmap_simple_unlock(&pv_free_list_lock);
1942 }
1943
1944 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt) {
1945 pmap_simple_lock(&pv_free_list_lock);
1946 pv_et->pve_next = (pv_entry_t *)pv_free_list;
1947 pv_free_list = pv_eh;
1948 pv_free_count += pv_cnt;
1949 pmap_simple_unlock(&pv_free_list_lock);
1950 }
1951
1952 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e) {
1953 assert(*pv_e == PV_ENTRY_NULL);
1954 pmap_simple_lock(&pv_kern_free_list_lock);
1955
1956 if ((*pv_e = pv_kern_free_list) != 0) {
1957 pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
1958 (*pv_e)->pve_next = PV_ENTRY_NULL;
1959 pv_kern_free_count--;
1960 pmap_kern_reserve_alloc_stat++;
1961 }
1962
1963 pmap_simple_unlock(&pv_kern_free_list_lock);
1964 }
1965
1966 static inline void PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt) {
1967 pmap_simple_lock(&pv_kern_free_list_lock);
1968 pv_et->pve_next = pv_kern_free_list;
1969 pv_kern_free_list = pv_eh;
1970 pv_kern_free_count += pv_cnt;
1971 pmap_simple_unlock(&pv_kern_free_list_lock);
1972 }
1973
1974 static inline void pmap_pv_throttle(__unused pmap_t p) {
1975 assert(p != kernel_pmap);
1976 /* Apply throttle on non-kernel mappings */
1977 if (pv_kern_free_count < (pv_kern_low_water_mark / 2)) {
1978 pmap_pv_throttle_stat++;
1979 /* This doesn't need to be strictly accurate, merely a hint
1980 * to eliminate the timeout when the reserve is replenished.
1981 */
1982 pmap_pv_throttled_waiters++;
1983 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
1984 thread_block(THREAD_CONTINUE_NULL);
1985 }
1986 }
1987
1988 /*
1989 * Creates a target number of free pv_entry_t objects for the kernel free list
1990 * and the general free list.
1991 */
1992 MARK_AS_PMAP_TEXT static kern_return_t
1993 mapping_free_prime_internal(void)
1994 {
1995 unsigned j;
1996 pmap_paddr_t pa;
1997 kern_return_t ret;
1998 pv_entry_t *pv_e;
1999 pv_entry_t *pv_eh;
2000 pv_entry_t *pv_et;
2001 int pv_cnt;
2002 int alloc_options = 0;
2003 int needed_pv_cnt = 0;
2004 int target_pv_free_cnt = 0;
2005
2006 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
2007 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
2008
2009 if (mapping_free_prime_internal_done) {
2010 return KERN_FAILURE;
2011 }
2012
2013 if (!mapping_free_prime_internal_called) {
2014 mapping_free_prime_internal_called = TRUE;
2015
2016 pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
2017
2018 /* Alterable via sysctl */
2019 pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
2020
2021 pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
2022 pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
2023 }
2024
2025 pv_cnt = 0;
2026 pv_eh = pv_et = PV_ENTRY_NULL;
2027 target_pv_free_cnt = PV_ALLOC_INITIAL_TARGET;
2028
2029 /*
2030 * We don't take the lock to read pv_free_count, as we should not be
2031 * invoking this from a multithreaded context.
2032 */
2033 needed_pv_cnt = target_pv_free_cnt - pv_free_count;
2034
2035 if (needed_pv_cnt > target_pv_free_cnt) {
2036 needed_pv_cnt = 0;
2037 }
2038
2039 while (pv_cnt < needed_pv_cnt) {
2040 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
2041
2042 assert(ret == KERN_SUCCESS);
2043
2044 pv_page_count++;
2045
2046 pv_e = (pv_entry_t *)phystokv(pa);
2047
2048 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
2049 pv_e->pve_next = pv_eh;
2050 pv_eh = pv_e;
2051
2052 if (pv_et == PV_ENTRY_NULL)
2053 pv_et = pv_e;
2054 pv_cnt++;
2055 pv_e++;
2056 }
2057 }
2058
2059 if (pv_cnt) {
2060 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2061 }
2062
2063 pv_cnt = 0;
2064 pv_eh = pv_et = PV_ENTRY_NULL;
2065 target_pv_free_cnt = PV_KERN_ALLOC_INITIAL_TARGET;
2066
2067 /*
2068 * We don't take the lock to read pv_kern_free_count, as we should not
2069 * be invoking this from a multithreaded context.
2070 */
2071 needed_pv_cnt = target_pv_free_cnt - pv_kern_free_count;
2072
2073 if (needed_pv_cnt > target_pv_free_cnt) {
2074 needed_pv_cnt = 0;
2075 }
2076
2077 while (pv_cnt < needed_pv_cnt) {
2078
2079 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
2080
2081 assert(ret == KERN_SUCCESS);
2082 pv_page_count++;
2083
2084 pv_e = (pv_entry_t *)phystokv(pa);
2085
2086 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
2087 pv_e->pve_next = pv_eh;
2088 pv_eh = pv_e;
2089
2090 if (pv_et == PV_ENTRY_NULL)
2091 pv_et = pv_e;
2092 pv_cnt++;
2093 pv_e++;
2094 }
2095 }
2096
2097 if (pv_cnt) {
2098 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2099 }
2100
2101 mapping_free_prime_internal_done = TRUE;
2102 return KERN_SUCCESS;
2103 }
2104
2105 void
2106 mapping_free_prime(void)
2107 {
2108 kern_return_t kr = KERN_FAILURE;
2109
2110 kr = mapping_free_prime_internal();
2111
2112 if (kr != KERN_SUCCESS) {
2113 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2114 }
2115 }
2116
2117 void mapping_replenish(void);
2118
2119 void mapping_adjust(void) {
2120 kern_return_t mres;
2121
2122 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
2123 if (mres != KERN_SUCCESS) {
2124 panic("pmap: mapping_replenish thread creation failed");
2125 }
2126 thread_deallocate(mapping_replenish_thread);
2127 }
2128
2129 /*
2130 * Fills the kernel and general PV free lists back up to their low watermarks.
2131 */
2132 MARK_AS_PMAP_TEXT static kern_return_t
2133 mapping_replenish_internal(void)
2134 {
2135 pv_entry_t *pv_e;
2136 pv_entry_t *pv_eh;
2137 pv_entry_t *pv_et;
2138 int pv_cnt;
2139 unsigned j;
2140 pmap_paddr_t pa;
2141 kern_return_t ret = KERN_SUCCESS;
2142
2143 while (pv_kern_free_count < pv_kern_low_water_mark) {
2144 pv_cnt = 0;
2145 pv_eh = pv_et = PV_ENTRY_NULL;
2146
2147 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2148 assert(ret == KERN_SUCCESS);
2149
2150 pv_page_count++;
2151
2152 pv_e = (pv_entry_t *)phystokv(pa);
2153
2154 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
2155 pv_e->pve_next = pv_eh;
2156 pv_eh = pv_e;
2157
2158 if (pv_et == PV_ENTRY_NULL)
2159 pv_et = pv_e;
2160 pv_cnt++;
2161 pv_e++;
2162 }
2163 pmap_kernel_reserve_replenish_stat += pv_cnt;
2164 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2165 }
2166
2167 while (pv_free_count < pv_low_water_mark) {
2168 pv_cnt = 0;
2169 pv_eh = pv_et = PV_ENTRY_NULL;
2170
2171 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2172 assert(ret == KERN_SUCCESS);
2173
2174 pv_page_count++;
2175
2176 pv_e = (pv_entry_t *)phystokv(pa);
2177
2178 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
2179 pv_e->pve_next = pv_eh;
2180 pv_eh = pv_e;
2181
2182 if (pv_et == PV_ENTRY_NULL)
2183 pv_et = pv_e;
2184 pv_cnt++;
2185 pv_e++;
2186 }
2187 pmap_user_reserve_replenish_stat += pv_cnt;
2188 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2189 }
2190
2191 return ret;
2192 }
2193
2194 /*
2195 * Continuation function that keeps the PV free lists from running out of free
2196 * elements.
2197 */
2198 __attribute__((noreturn))
2199 void
2200 mapping_replenish(void)
2201 {
2202 kern_return_t kr;
2203
2204 /* We qualify for VM privileges...*/
2205 current_thread()->options |= TH_OPT_VMPRIV;
2206
2207 for (;;) {
2208 kr = mapping_replenish_internal();
2209
2210 if (kr != KERN_SUCCESS) {
2211 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2212 }
2213
2214 /*
2215 * Wake threads throttled while the kernel reserve was being replenished.
2216 */
2217 if (pmap_pv_throttled_waiters) {
2218 pmap_pv_throttled_waiters = 0;
2219 thread_wakeup(&pmap_user_pv_throttle_event);
2220 }
2221
2222 /* Check if the kernel pool has been depleted since the
2223 * first pass, to reduce refill latency.
2224 */
2225 if (pv_kern_free_count < pv_kern_low_water_mark)
2226 continue;
2227 /* Block sans continuation to avoid yielding kernel stack */
2228 assert_wait(&mapping_replenish_event, THREAD_UNINT);
2229 mappingrecurse = 0;
2230 thread_block(THREAD_CONTINUE_NULL);
2231 pmap_mapping_thread_wakeups++;
2232 }
2233 }
2234
2235
2236 static void
2237 ptd_bootstrap(
2238 pt_desc_t *ptdp,
2239 unsigned int ptd_cnt)
2240 {
2241 simple_lock_init(&ptd_free_list_lock, 0);
2242 while (ptd_cnt != 0) {
2243 (*(void **)ptdp) = (void *)ptd_free_list;
2244 ptd_free_list = ptdp;
2245 ptdp++;
2246 ptd_cnt--;
2247 ptd_free_count++;
2248 }
2249 ptd_preboot = FALSE;
2250 }
2251
2252 static pt_desc_t
2253 *ptd_alloc_unlinked(void)
2254 {
2255 pt_desc_t *ptdp;
2256 unsigned i;
2257
2258 if (!ptd_preboot)
2259 pmap_simple_lock(&ptd_free_list_lock);
2260
2261 if (ptd_free_count == 0) {
2262 unsigned int ptd_cnt;
2263 pt_desc_t *ptdp_next;
2264
2265 if (ptd_preboot) {
2266 ptdp = (pt_desc_t *)avail_start;
2267 avail_start += ARM_PGBYTES;
2268 ptdp_next = ptdp;
2269 ptd_cnt = ARM_PGBYTES/sizeof(pt_desc_t);
2270 } else {
2271 pmap_paddr_t pa;
2272 kern_return_t ret;
2273
2274 pmap_simple_unlock(&ptd_free_list_lock);
2275
2276 if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
2277 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
2278 assert(ret == KERN_SUCCESS);
2279 }
2280 ptdp = (pt_desc_t *)phystokv(pa);
2281
2282 pmap_simple_lock(&ptd_free_list_lock);
2283 ptdp_next = ptdp;
2284 ptd_cnt = PAGE_SIZE/sizeof(pt_desc_t);
2285 }
2286
2287 while (ptd_cnt != 0) {
2288 (*(void **)ptdp_next) = (void *)ptd_free_list;
2289 ptd_free_list = ptdp_next;
2290 ptdp_next++;
2291 ptd_cnt--;
2292 ptd_free_count++;
2293 }
2294 }
2295
2296 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
2297 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
2298 ptd_free_count--;
2299 } else {
2300 panic("out of ptd entry\n");
2301 }
2302
2303 if (!ptd_preboot)
2304 pmap_simple_unlock(&ptd_free_list_lock);
2305
2306 ptdp->pt_page.next = NULL;
2307 ptdp->pt_page.prev = NULL;
2308 ptdp->pmap = NULL;
2309
2310 for (i = 0 ; i < PT_INDEX_MAX ; i++) {
2311 ptdp->pt_map[i].va = (vm_offset_t)-1;
2312 ptdp->pt_cnt[i].refcnt = 0;
2313 ptdp->pt_cnt[i].wiredcnt = 0;
2314 }
2315
2316 return(ptdp);
2317 }
2318
2319 static inline pt_desc_t*
2320 ptd_alloc(pmap_t pmap)
2321 {
2322 pt_desc_t *ptdp = ptd_alloc_unlinked();
2323
2324 ptdp->pmap = pmap;
2325 if (pmap != kernel_pmap) {
2326 /* We should never try to reclaim kernel pagetable pages in
2327 * pmap_pages_reclaim(), so don't enter them into the list. */
2328 pmap_simple_lock(&pt_pages_lock);
2329 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2330 pmap_simple_unlock(&pt_pages_lock);
2331 }
2332
2333 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
2334 return ptdp;
2335 }
2336
2337 static void
2338 ptd_deallocate(pt_desc_t *ptdp)
2339 {
2340 pmap_t pmap = ptdp->pmap;
2341
2342 if (ptd_preboot) {
2343 panic("ptd_deallocate(): early boot\n");
2344 }
2345
2346 if (ptdp->pt_page.next != NULL) {
2347 pmap_simple_lock(&pt_pages_lock);
2348 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2349 pmap_simple_unlock(&pt_pages_lock);
2350 }
2351 pmap_simple_lock(&ptd_free_list_lock);
2352 (*(void **)ptdp) = (void *)ptd_free_list;
2353 ptd_free_list = (pt_desc_t *)ptdp;
2354 ptd_free_count++;
2355 pmap_simple_unlock(&ptd_free_list_lock);
2356 if (pmap != NULL)
2357 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
2358 }
2359
2360 static void
2361 ptd_init(
2362 pt_desc_t *ptdp,
2363 pmap_t pmap,
2364 vm_map_address_t va,
2365 unsigned int level,
2366 pt_entry_t *pte_p)
2367 {
2368 if (ptdp->pmap != pmap)
2369 panic("ptd_init(): pmap mismatch\n");
2370
2371 #if (__ARM_VMSA__ == 7)
2372 assert(level == 2);
2373 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
2374 #else
2375 if (level == 3)
2376 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~ARM_TT_L2_OFFMASK;
2377 else if (level == 2)
2378 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~ARM_TT_L1_OFFMASK;
2379 #endif
2380 if (level < PMAP_TT_MAX_LEVEL)
2381 ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
2382 }
2383
2384
2385 boolean_t
2386 pmap_valid_address(
2387 pmap_paddr_t addr)
2388 {
2389 return pa_valid(addr);
2390 }
2391
2392 #if (__ARM_VMSA__ == 7)
2393
2394 /*
2395 * Given an offset and a map, compute the address of the
2396 * corresponding translation table entry.
2397 */
2398 static inline tt_entry_t *
2399 pmap_tte(pmap_t pmap,
2400 vm_map_address_t addr)
2401 {
2402 if (!(tte_index(pmap, addr) < pmap->tte_index_max))
2403 return (tt_entry_t *)NULL;
2404 return (&pmap->tte[tte_index(pmap, addr)]);
2405 }
2406
2407
2408 /*
2409 * Given an offset and a map, compute the address of the
2410 * pte. If the address is invalid with respect to the map
2411 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2412 *
2413 * This is only used internally.
2414 */
2415 static inline pt_entry_t *
2416 pmap_pte(
2417 pmap_t pmap,
2418 vm_map_address_t addr)
2419 {
2420 pt_entry_t *ptp;
2421 tt_entry_t *ttp;
2422 tt_entry_t tte;
2423
2424 ttp = pmap_tte(pmap, addr);
2425 if (ttp == (tt_entry_t *)NULL)
2426 return (PT_ENTRY_NULL);
2427 tte = *ttp;
2428 #if MACH_ASSERT
2429 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK)
2430 panic("Attempt to demote L1 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2431 #endif
2432 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)
2433 return (PT_ENTRY_NULL);
2434 ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
2435 return (ptp);
2436 }
2437
2438 #else
2439
2440 /*
2441 * Given an offset and a map, compute the address of level 1 translation table entry.
2442 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2443 */
2444 static inline tt_entry_t *
2445 pmap_tt1e(pmap_t pmap,
2446 vm_map_address_t addr)
2447 {
2448 /* Level 0 currently unused */
2449 #if __ARM64_TWO_LEVEL_PMAP__
2450 #pragma unused(pmap, addr)
2451 panic("pmap_tt1e called on a two level pmap");
2452 return (NULL);
2453 #else
2454 return (&pmap->tte[tt1_index(pmap, addr)]);
2455 #endif
2456 }
2457
2458 /*
2459 * Given an offset and a map, compute the address of level 2 translation table entry.
2460 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2461 */
2462 static inline tt_entry_t *
2463 pmap_tt2e(pmap_t pmap,
2464 vm_map_address_t addr)
2465 {
2466 #if __ARM64_TWO_LEVEL_PMAP__
2467 return (&pmap->tte[tt2_index(pmap, addr)]);
2468 #else
2469 tt_entry_t *ttp;
2470 tt_entry_t tte;
2471
2472 ttp = pmap_tt1e(pmap, addr);
2473 tte = *ttp;
2474 #if MACH_ASSERT
2475 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID))
2476 panic("Attempt to demote L1 block (?!): pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2477 #endif
2478 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID))
2479 return (PT_ENTRY_NULL);
2480
2481 ttp = &((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, addr)];
2482 return ((tt_entry_t *)ttp);
2483 #endif
2484 }
2485
2486
2487 /*
2488 * Given an offset and a map, compute the address of level 3 translation table entry.
2489 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2490 */
2491 static inline pt_entry_t *
2492 pmap_tt3e(
2493 pmap_t pmap,
2494 vm_map_address_t addr)
2495 {
2496 pt_entry_t *ptp;
2497 tt_entry_t *ttp;
2498 tt_entry_t tte;
2499
2500 ttp = pmap_tt2e(pmap, addr);
2501 if (ttp == PT_ENTRY_NULL)
2502 return (PT_ENTRY_NULL);
2503
2504 tte = *ttp;
2505
2506 #if MACH_ASSERT
2507 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID))
2508 panic("Attempt to demote L2 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2509 #endif
2510 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
2511 return (PT_ENTRY_NULL);
2512 }
2513
2514 /* Get third-level (4KB) entry */
2515 ptp = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, addr)]);
2516 return (ptp);
2517 }
2518
2519
2520 static inline tt_entry_t *
2521 pmap_tte(
2522 pmap_t pmap,
2523 vm_map_address_t addr)
2524 {
2525 return(pmap_tt2e(pmap, addr));
2526 }
2527
2528
2529 static inline pt_entry_t *
2530 pmap_pte(
2531 pmap_t pmap,
2532 vm_map_address_t addr)
2533 {
2534 return(pmap_tt3e(pmap, addr));
2535 }
2536
2537 #endif
2538
2539
2540 /*
2541 * Map memory at initialization. The physical addresses being
2542 * mapped are not managed and are never unmapped.
2543 *
2544 * For now, VM is already on, we only need to map the
2545 * specified memory.
2546 */
2547 vm_map_address_t
2548 pmap_map(
2549 vm_map_address_t virt,
2550 vm_offset_t start,
2551 vm_offset_t end,
2552 vm_prot_t prot,
2553 unsigned int flags)
2554 {
2555 kern_return_t kr;
2556 vm_size_t ps;
2557
2558 ps = PAGE_SIZE;
2559 while (start < end) {
2560 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
2561 prot, VM_PROT_NONE, flags, FALSE);
2562
2563 if (kr != KERN_SUCCESS) {
2564 panic("%s: failed pmap_enter, "
2565 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
2566 __FUNCTION__,
2567 (void *) virt, (void *) start, (void *) end, prot, flags);
2568 }
2569
2570 virt += ps;
2571 start += ps;
2572 }
2573 return (virt);
2574 }
2575
2576 vm_map_address_t
2577 pmap_map_bd_with_options(
2578 vm_map_address_t virt,
2579 vm_offset_t start,
2580 vm_offset_t end,
2581 vm_prot_t prot,
2582 int32_t options)
2583 {
2584 pt_entry_t tmplate;
2585 pt_entry_t *ptep;
2586 vm_map_address_t vaddr;
2587 vm_offset_t paddr;
2588 pt_entry_t mem_attr;
2589
2590 switch (options & PMAP_MAP_BD_MASK) {
2591 case PMAP_MAP_BD_WCOMB:
2592 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
2593 #if (__ARM_VMSA__ > 7)
2594 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
2595 #else
2596 mem_attr |= ARM_PTE_SH;
2597 #endif
2598 break;
2599 case PMAP_MAP_BD_POSTED:
2600 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
2601 break;
2602 default:
2603 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2604 break;
2605 }
2606
2607 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
2608 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
2609 #if __ARM_KERNEL_PROTECT__
2610 tmplate |= ARM_PTE_NG;
2611 #endif /* __ARM_KERNEL_PROTECT__ */
2612
2613 vaddr = virt;
2614 paddr = start;
2615 while (paddr < end) {
2616
2617 ptep = pmap_pte(kernel_pmap, vaddr);
2618 if (ptep == PT_ENTRY_NULL) {
2619 panic("pmap_map_bd");
2620 }
2621 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2622 WRITE_PTE_STRONG(ptep, tmplate);
2623
2624 pte_increment_pa(tmplate);
2625 vaddr += PAGE_SIZE;
2626 paddr += PAGE_SIZE;
2627 }
2628
2629 if (end >= start)
2630 flush_mmu_tlb_region(virt, (unsigned)(end - start));
2631
2632 return (vaddr);
2633 }
2634
2635 /*
2636 * Back-door routine for mapping kernel VM at initialization.
2637 * Useful for mapping memory outside the range
2638 * [vm_first_phys, vm_last_phys] (i.e., devices).
2639 * Otherwise like pmap_map.
2640 */
2641 vm_map_address_t
2642 pmap_map_bd(
2643 vm_map_address_t virt,
2644 vm_offset_t start,
2645 vm_offset_t end,
2646 vm_prot_t prot)
2647 {
2648 pt_entry_t tmplate;
2649 pt_entry_t *ptep;
2650 vm_map_address_t vaddr;
2651 vm_offset_t paddr;
2652
2653 /* not cacheable and not buffered */
2654 tmplate = pa_to_pte(start)
2655 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
2656 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
2657 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2658 #if __ARM_KERNEL_PROTECT__
2659 tmplate |= ARM_PTE_NG;
2660 #endif /* __ARM_KERNEL_PROTECT__ */
2661
2662 vaddr = virt;
2663 paddr = start;
2664 while (paddr < end) {
2665
2666 ptep = pmap_pte(kernel_pmap, vaddr);
2667 if (ptep == PT_ENTRY_NULL) {
2668 panic("pmap_map_bd");
2669 }
2670 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2671 WRITE_PTE_STRONG(ptep, tmplate);
2672
2673 pte_increment_pa(tmplate);
2674 vaddr += PAGE_SIZE;
2675 paddr += PAGE_SIZE;
2676 }
2677
2678 if (end >= start)
2679 flush_mmu_tlb_region(virt, (unsigned)(end - start));
2680
2681 return (vaddr);
2682 }
2683
2684 /*
2685 * Back-door routine for mapping kernel VM at initialization.
2686 * Useful for mapping memory specific physical addresses in early
2687 * boot (i.e., before kernel_map is initialized).
2688 *
2689 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
2690 */
2691
2692 vm_map_address_t
2693 pmap_map_high_window_bd(
2694 vm_offset_t pa_start,
2695 vm_size_t len,
2696 vm_prot_t prot)
2697 {
2698 pt_entry_t *ptep, pte;
2699 #if (__ARM_VMSA__ == 7)
2700 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
2701 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
2702 #else
2703 vm_map_address_t va_start = VREGION1_START;
2704 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
2705 #endif
2706 vm_map_address_t va_end;
2707 vm_map_address_t va;
2708 vm_size_t offset;
2709
2710 offset = pa_start & PAGE_MASK;
2711 pa_start -= offset;
2712 len += offset;
2713
2714 if (len > (va_max - va_start)) {
2715 panic("pmap_map_high_window_bd: area too large\n");
2716 }
2717
2718 scan:
2719 for ( ; va_start < va_max; va_start += PAGE_SIZE) {
2720 ptep = pmap_pte(kernel_pmap, va_start);
2721 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2722 if (*ptep == ARM_PTE_TYPE_FAULT)
2723 break;
2724 }
2725 if (va_start > va_max) {
2726 panic("pmap_map_high_window_bd: insufficient pages\n");
2727 }
2728
2729 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
2730 ptep = pmap_pte(kernel_pmap, va_end);
2731 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2732 if (*ptep != ARM_PTE_TYPE_FAULT) {
2733 va_start = va_end + PAGE_SIZE;
2734 goto scan;
2735 }
2736 }
2737
2738 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
2739 ptep = pmap_pte(kernel_pmap, va);
2740 pte = pa_to_pte(pa_start)
2741 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
2742 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
2743 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
2744 #if (__ARM_VMSA__ > 7)
2745 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
2746 #else
2747 pte |= ARM_PTE_SH;
2748 #endif
2749 #if __ARM_KERNEL_PROTECT__
2750 pte |= ARM_PTE_NG;
2751 #endif /* __ARM_KERNEL_PROTECT__ */
2752 WRITE_PTE_STRONG(ptep, pte);
2753 }
2754 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len);
2755 #if KASAN
2756 kasan_notify_address(va_start, len);
2757 #endif
2758 return va_start;
2759 }
2760
2761 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
2762
2763 static vm_size_t
2764 pmap_compute_io_rgns(void)
2765 {
2766 DTEntry entry;
2767 pmap_io_range_t *ranges;
2768 uint64_t rgn_end;
2769 void *prop = NULL;
2770 int err;
2771 unsigned int prop_size;
2772
2773 err = DTLookupEntry(NULL, "/defaults", &entry);
2774 assert(err == kSuccess);
2775
2776 if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size))
2777 return 0;
2778
2779 ranges = prop;
2780 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
2781 if (ranges[i].addr & PAGE_MASK)
2782 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
2783 if (ranges[i].len & PAGE_MASK)
2784 panic("pmap I/O region %u length 0x%x is not page-aligned", i, ranges[i].len);
2785 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end))
2786 panic("pmap I/O region %u addr 0x%llx length 0x%x wraps around", i, ranges[i].addr, ranges[i].len);
2787 if ((i == 0) || (ranges[i].addr < io_rgn_start))
2788 io_rgn_start = ranges[i].addr;
2789 if ((i == 0) || (rgn_end > io_rgn_end))
2790 io_rgn_end = rgn_end;
2791 ++num_io_rgns;
2792 }
2793
2794 if (io_rgn_start & PAGE_MASK)
2795 panic("pmap I/O region start is not page-aligned!\n");
2796
2797 if (io_rgn_end & PAGE_MASK)
2798 panic("pmap I/O region end is not page-aligned!\n");
2799
2800 if (((io_rgn_start <= gPhysBase) && (io_rgn_end > gPhysBase)) ||
2801 ((io_rgn_start < avail_end) && (io_rgn_end >= avail_end)) ||
2802 ((io_rgn_start > gPhysBase) && (io_rgn_end < avail_end)))
2803 panic("pmap I/O region overlaps physical memory!\n");
2804
2805 return (num_io_rgns * sizeof(*ranges));
2806 }
2807
2808 /*
2809 * return < 0 for a < b
2810 * 0 for a == b
2811 * > 0 for a > b
2812 */
2813 typedef int (*cmpfunc_t)(const void *a, const void *b);
2814
2815 extern void
2816 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
2817
2818 static int
2819 cmp_io_rgns(const void *a, const void *b)
2820 {
2821 const pmap_io_range_t *range_a = a;
2822 const pmap_io_range_t *range_b = b;
2823 if ((range_b->addr + range_b->len) <= range_a->addr)
2824 return 1;
2825 else if ((range_a->addr + range_a->len) <= range_b->addr)
2826 return -1;
2827 else
2828 return 0;
2829 }
2830
2831 static void
2832 pmap_load_io_rgns(void)
2833 {
2834 DTEntry entry;
2835 pmap_io_range_t *ranges;
2836 void *prop = NULL;
2837 int err;
2838 unsigned int prop_size;
2839
2840 if (num_io_rgns == 0)
2841 return;
2842
2843 err = DTLookupEntry(NULL, "/defaults", &entry);
2844 assert(err == kSuccess);
2845
2846 err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
2847 assert(err == kSuccess);
2848
2849 ranges = prop;
2850 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i)
2851 io_attr_table[i] = ranges[i];
2852
2853 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
2854 }
2855
2856 #if __arm64__
2857 /*
2858 * pmap_get_arm64_prot
2859 *
2860 * return effective armv8 VMSA block protections including
2861 * table AP/PXN/XN overrides of a pmap entry
2862 *
2863 */
2864
2865 uint64_t
2866 pmap_get_arm64_prot(
2867 pmap_t pmap,
2868 vm_offset_t addr)
2869 {
2870 uint64_t tte;
2871 uint64_t tt_type, table_ap, table_xn, table_pxn;
2872 uint64_t prot = 0;
2873
2874 tte = *pmap_tt1e(pmap, addr);
2875
2876 if (!(tte & ARM_TTE_VALID)) {
2877 return 0;
2878 }
2879
2880 tt_type = tte & ARM_TTE_TYPE_MASK;
2881
2882 if(tt_type == ARM_TTE_TYPE_BLOCK) {
2883 return prot | (tte & ARM_TTE_BLOCK_NX) | (tte & ARM_TTE_BLOCK_PNX) | (tte & ARM_TTE_BLOCK_APMASK) | ARM_TTE_VALID;
2884 }
2885
2886 table_ap = (tte >> ARM_TTE_TABLE_APSHIFT) & 0x3;
2887 table_xn = tte & ARM_TTE_TABLE_XN;
2888 table_pxn = tte & ARM_TTE_TABLE_PXN;
2889
2890 prot |= (table_ap << ARM_TTE_BLOCK_APSHIFT) | (table_xn ? ARM_TTE_BLOCK_NX : 0) | (table_pxn ? ARM_TTE_BLOCK_PNX : 0);
2891
2892 tte = *pmap_tt2e(pmap, addr);
2893 if (!(tte & ARM_TTE_VALID)) {
2894 return 0;
2895 }
2896
2897 tt_type = tte & ARM_TTE_TYPE_MASK;
2898
2899 if (tt_type == ARM_TTE_TYPE_BLOCK) {
2900 return prot | (tte & ARM_TTE_BLOCK_NX) | (tte & ARM_TTE_BLOCK_PNX) | (tte & ARM_TTE_BLOCK_APMASK) | ARM_TTE_VALID;
2901 }
2902
2903 table_ap = (tte >> ARM_TTE_TABLE_APSHIFT) & 0x3;
2904 table_xn = tte & ARM_TTE_TABLE_XN;
2905 table_pxn = tte & ARM_TTE_TABLE_PXN;
2906
2907 prot |= (table_ap << ARM_TTE_BLOCK_APSHIFT) | (table_xn ? ARM_TTE_BLOCK_NX : 0) | (table_pxn ? ARM_TTE_BLOCK_PNX : 0);
2908
2909 tte = *pmap_tt3e(pmap, addr);
2910 if (!(tte & ARM_TTE_VALID)) {
2911 return 0;
2912 }
2913
2914 return prot | (tte & ARM_TTE_BLOCK_NX) | (tte & ARM_TTE_BLOCK_PNX) | (tte & ARM_TTE_BLOCK_APMASK) | ARM_TTE_VALID;
2915 }
2916 #endif /* __arm64__ */
2917
2918
2919 /*
2920 * Bootstrap the system enough to run with virtual memory.
2921 *
2922 * The early VM initialization code has already allocated
2923 * the first CPU's translation table and made entries for
2924 * all the one-to-one mappings to be found there.
2925 *
2926 * We must set up the kernel pmap structures, the
2927 * physical-to-virtual translation lookup tables for the
2928 * physical memory to be managed (between avail_start and
2929 * avail_end).
2930
2931 * Map the kernel's code and data, and allocate the system page table.
2932 * Page_size must already be set.
2933 *
2934 * Parameters:
2935 * first_avail first available physical page -
2936 * after kernel page tables
2937 * avail_start PA of first managed physical page
2938 * avail_end PA of last managed physical page
2939 */
2940
2941 void
2942 pmap_bootstrap(
2943 vm_offset_t vstart)
2944 {
2945 pmap_paddr_t pmap_struct_start;
2946 vm_size_t pv_head_size;
2947 vm_size_t ptd_root_table_size;
2948 vm_size_t pp_attr_table_size;
2949 vm_size_t io_attr_table_size;
2950 unsigned int npages;
2951 unsigned int i;
2952 vm_map_offset_t maxoffset;
2953
2954
2955 #if DEVELOPMENT || DEBUG
2956 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof (pmap_trace_mask))) {
2957 kprintf("Kernel traces for pmap operations enabled\n");
2958 }
2959 #endif
2960
2961 /*
2962 * Initialize the kernel pmap.
2963 */
2964 pmap_stamp = 1;
2965 kernel_pmap->tte = cpu_tte;
2966 kernel_pmap->ttep = cpu_ttep;
2967 #if (__ARM_VMSA__ > 7)
2968 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
2969 #else
2970 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
2971 #endif
2972 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
2973 kernel_pmap->ref_count = 1;
2974 kernel_pmap->gc_status = 0;
2975 kernel_pmap->nx_enabled = TRUE;
2976 #ifdef __arm64__
2977 kernel_pmap->is_64bit = TRUE;
2978 #else
2979 kernel_pmap->is_64bit = FALSE;
2980 #endif
2981 kernel_pmap->stamp = hw_atomic_add(&pmap_stamp, 1);
2982
2983 kernel_pmap->nested_region_grand_addr = 0x0ULL;
2984 kernel_pmap->nested_region_subord_addr = 0x0ULL;
2985 kernel_pmap->nested_region_size = 0x0ULL;
2986 kernel_pmap->nested_region_asid_bitmap = NULL;
2987 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
2988
2989 #if (__ARM_VMSA__ == 7)
2990 kernel_pmap->tte_index_max = 4*NTTES;
2991 #else
2992 kernel_pmap->tte_index_max = (ARM_PGBYTES / sizeof(tt_entry_t));
2993 #endif
2994 kernel_pmap->prev_tte = (tt_entry_t *) NULL;
2995
2996 PMAP_LOCK_INIT(kernel_pmap);
2997 #if (__ARM_VMSA__ == 7)
2998 simple_lock_init(&kernel_pmap->tt1_lock, 0);
2999 kernel_pmap->cpu_ref = 0;
3000 #endif
3001 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
3002
3003 /* allocate space for and initialize the bookkeeping structures */
3004 io_attr_table_size = pmap_compute_io_rgns();
3005 npages = (unsigned int)atop(mem_size);
3006 pp_attr_table_size = npages * sizeof(pp_attr_t);
3007 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
3008 #if (__ARM_VMSA__ == 7)
3009 ptd_root_table_size = sizeof(pt_desc_t) * (1<<((mem_size>>30)+12));
3010 #else
3011 ptd_root_table_size = sizeof(pt_desc_t) * (1<<((mem_size>>30)+13));
3012 #endif
3013
3014 pmap_struct_start = avail_start;
3015
3016 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
3017 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
3018 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
3019 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
3020 pv_head_table = (pv_entry_t **) phystokv(avail_start);
3021 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
3022 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
3023 avail_start = round_page(avail_start + ptd_root_table_size);
3024
3025 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
3026
3027 pmap_load_io_rgns();
3028 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size/sizeof(pt_desc_t)));
3029
3030 pmap_cpu_data_array_init();
3031
3032 vm_first_phys = gPhysBase;
3033 vm_last_phys = trunc_page(avail_end);
3034
3035 simple_lock_init(&pmaps_lock, 0);
3036 queue_init(&map_pmap_list);
3037 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
3038 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
3039 free_page_size_tt_count = 0;
3040 free_page_size_tt_max = 0;
3041 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
3042 free_two_page_size_tt_count = 0;
3043 free_two_page_size_tt_max = 0;
3044 free_tt_list = TT_FREE_ENTRY_NULL;
3045 free_tt_count = 0;
3046 free_tt_max = 0;
3047
3048 simple_lock_init(&pt_pages_lock, 0);
3049 queue_init(&pt_page_list);
3050
3051 simple_lock_init(&pmap_pages_lock, 0);
3052 pmap_pages_request_count = 0;
3053 pmap_pages_request_acum = 0;
3054 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
3055
3056 virtual_space_start = vstart;
3057 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
3058
3059 /* mark all the address spaces in use */
3060 for (i = 0; i < MAX_ASID / (sizeof(uint32_t) * NBBY); i++)
3061 asid_bitmap[i] = 0xffffffff;
3062
3063 /*
3064 * The kernel gets ASID 0, and all aliases of it. This is
3065 * important because ASID 0 is global; if we vend ASID 0
3066 * out to a user pmap, those translations will show up in
3067 * other processes through the TLB.
3068 */
3069 for (i = 0; i < MAX_ASID; i += ARM_MAX_ASID) {
3070 asid_bitmap[i / (sizeof(uint32_t) * NBBY)] &= ~(1 << (i % (sizeof(uint32_t) * NBBY)));
3071
3072 #if __ARM_KERNEL_PROTECT__
3073 assert((i + 1) < MAX_ASID);
3074 asid_bitmap[(i + 1) / (sizeof(uint32_t) * NBBY)] &= ~(1 << ((i + 1) % (sizeof(uint32_t) * NBBY)));
3075 #endif /* __ARM_KERNEL_PROTECT__ */
3076 }
3077
3078 kernel_pmap->asid = 0;
3079 kernel_pmap->vasid = 0;
3080
3081
3082 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof (maxoffset))) {
3083 maxoffset = trunc_page(maxoffset);
3084 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
3085 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
3086 arm_pmap_max_offset_default = maxoffset;
3087 }
3088 }
3089 #if defined(__arm64__)
3090 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof (maxoffset))) {
3091 maxoffset = trunc_page(maxoffset);
3092 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
3093 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
3094 arm64_pmap_max_offset_default = maxoffset;
3095 }
3096 }
3097 #endif
3098
3099 #if DEVELOPMENT || DEBUG
3100 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof (panic_on_unsigned_execute));
3101 #endif /* DEVELOPMENT || DEBUG */
3102
3103 pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
3104 pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
3105
3106 simple_lock_init(&phys_backup_lock, 0);
3107
3108
3109 #if MACH_ASSERT
3110 PE_parse_boot_argn("pmap_stats_assert",
3111 &pmap_stats_assert,
3112 sizeof (pmap_stats_assert));
3113 PE_parse_boot_argn("vm_footprint_suspend_allowed",
3114 &vm_footprint_suspend_allowed,
3115 sizeof (vm_footprint_suspend_allowed));
3116 #endif /* MACH_ASSERT */
3117
3118 #if KASAN
3119 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
3120 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
3121 #endif /* KASAN */
3122 }
3123
3124
3125 void
3126 pmap_virtual_space(
3127 vm_offset_t *startp,
3128 vm_offset_t *endp
3129 )
3130 {
3131 *startp = virtual_space_start;
3132 *endp = virtual_space_end;
3133 }
3134
3135
3136 boolean_t
3137 pmap_virtual_region(
3138 unsigned int region_select,
3139 vm_map_offset_t *startp,
3140 vm_map_size_t *size
3141 )
3142 {
3143 boolean_t ret = FALSE;
3144 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3145 if (region_select == 0) {
3146 /*
3147 * In this config, the bootstrap mappings should occupy their own L2
3148 * TTs, as they should be immutable after boot. Having the associated
3149 * TTEs and PTEs in their own pages allows us to lock down those pages,
3150 * while allowing the rest of the kernel address range to be remapped.
3151 */
3152 #if (__ARM_VMSA__ > 7)
3153 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
3154 #else
3155 #error Unsupported configuration
3156 #endif
3157 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
3158 ret = TRUE;
3159 }
3160 #else
3161 #if (__ARM_VMSA__ > 7)
3162 unsigned long low_global_vr_mask = 0;
3163 vm_map_size_t low_global_vr_size = 0;
3164 #endif
3165
3166 if (region_select == 0) {
3167 #if (__ARM_VMSA__ == 7)
3168 *startp = gVirtBase & 0xFFC00000;
3169 *size = ((virtual_space_start-(gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3170 #else
3171 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3172 if (!TEST_PAGE_SIZE_4K) {
3173 *startp = gVirtBase & 0xFFFFFFFFFE000000;
3174 *size = ((virtual_space_start-(gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3175 } else {
3176 *startp = gVirtBase & 0xFFFFFFFFFF800000;
3177 *size = ((virtual_space_start-(gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3178 }
3179 #endif
3180 ret = TRUE;
3181 }
3182 if (region_select == 1) {
3183 *startp = VREGION1_START;
3184 *size = VREGION1_SIZE;
3185 ret = TRUE;
3186 }
3187 #if (__ARM_VMSA__ > 7)
3188 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3189 if (!TEST_PAGE_SIZE_4K) {
3190 low_global_vr_mask = 0xFFFFFFFFFE000000;
3191 low_global_vr_size = 0x2000000;
3192 } else {
3193 low_global_vr_mask = 0xFFFFFFFFFF800000;
3194 low_global_vr_size = 0x800000;
3195 }
3196
3197 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
3198 *startp = LOW_GLOBAL_BASE_ADDRESS;
3199 *size = low_global_vr_size;
3200 ret = TRUE;
3201 }
3202
3203 if (region_select == 3) {
3204 /* In this config, we allow the bootstrap mappings to occupy the same
3205 * page table pages as the heap.
3206 */
3207 *startp = VM_MIN_KERNEL_ADDRESS;
3208 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
3209 ret = TRUE;
3210 }
3211 #endif
3212 #endif
3213 return ret;
3214 }
3215
3216 unsigned int
3217 pmap_free_pages(
3218 void)
3219 {
3220 return (unsigned int)atop(avail_end - first_avail);
3221 }
3222
3223
3224 boolean_t
3225 pmap_next_page_hi(
3226 ppnum_t * pnum)
3227 {
3228 return pmap_next_page(pnum);
3229 }
3230
3231
3232 boolean_t
3233 pmap_next_page(
3234 ppnum_t *pnum)
3235 {
3236 if (first_avail != avail_end) {
3237 *pnum = (ppnum_t)atop(first_avail);
3238 first_avail += PAGE_SIZE;
3239 return TRUE;
3240 }
3241 return FALSE;
3242 }
3243
3244
3245 /*
3246 * Initialize the pmap module.
3247 * Called by vm_init, to initialize any structures that the pmap
3248 * system needs to map virtual memory.
3249 */
3250 void
3251 pmap_init(
3252 void)
3253 {
3254 /*
3255 * Protect page zero in the kernel map.
3256 * (can be overruled by permanent transltion
3257 * table entries at page zero - see arm_vm_init).
3258 */
3259 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
3260
3261 pmap_initialized = TRUE;
3262
3263 pmap_zone_init();
3264
3265
3266 /*
3267 * Initialize the pmap object (for tracking the vm_page_t
3268 * structures for pages we allocate to be page tables in
3269 * pmap_expand().
3270 */
3271 _vm_object_allocate(mem_size, pmap_object);
3272 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3273
3274 pv_init();
3275
3276 /*
3277 * The value of hard_maxproc may have been scaled, make sure
3278 * it is still less than the value of MAX_ASID.
3279 */
3280 assert(hard_maxproc < MAX_ASID);
3281
3282 #if CONFIG_PGTRACE
3283 pmap_pgtrace_init();
3284 #endif
3285 }
3286
3287 boolean_t
3288 pmap_verify_free(
3289 ppnum_t ppnum)
3290 {
3291 pv_entry_t **pv_h;
3292 int pai;
3293 pmap_paddr_t phys = ptoa(ppnum);
3294
3295 assert(phys != vm_page_fictitious_addr);
3296
3297 if (!pa_valid(phys))
3298 return (FALSE);
3299
3300 pai = (int)pa_index(phys);
3301 pv_h = pai_to_pvh(pai);
3302
3303 return (pvh_test_type(pv_h, PVH_TYPE_NULL));
3304 }
3305
3306
3307 /*
3308 * Initialize zones used by pmap.
3309 */
3310 static void
3311 pmap_zone_init(
3312 void)
3313 {
3314 /*
3315 * Create the zone of physical maps
3316 * and the physical-to-virtual entries.
3317 */
3318 pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap)*256,
3319 PAGE_SIZE, "pmap");
3320 }
3321
3322
3323 void
3324 pmap_ledger_alloc_init(size_t size)
3325 {
3326 panic("%s: unsupported, "
3327 "size=%lu",
3328 __func__, size);
3329 }
3330
3331 ledger_t
3332 pmap_ledger_alloc(void)
3333 {
3334 ledger_t retval = NULL;
3335
3336 panic("%s: unsupported",
3337 __func__);
3338
3339 return retval;
3340 }
3341
3342 void
3343 pmap_ledger_free(ledger_t ledger)
3344 {
3345 panic("%s: unsupported, "
3346 "ledger=%p",
3347 __func__, ledger);
3348 }
3349
3350 /*
3351 * Create and return a physical map.
3352 *
3353 * If the size specified for the map
3354 * is zero, the map is an actual physical
3355 * map, and may be referenced by the
3356 * hardware.
3357 *
3358 * If the size specified is non-zero,
3359 * the map will be used in software only, and
3360 * is bounded by that size.
3361 */
3362 MARK_AS_PMAP_TEXT static pmap_t
3363 pmap_create_internal(
3364 ledger_t ledger,
3365 vm_map_size_t size,
3366 boolean_t is_64bit)
3367 {
3368 unsigned i;
3369 pmap_t p;
3370
3371 /*
3372 * A software use-only map doesn't even need a pmap.
3373 */
3374 if (size != 0) {
3375 return (PMAP_NULL);
3376 }
3377
3378
3379 /*
3380 * Allocate a pmap struct from the pmap_zone. Then allocate
3381 * the translation table of the right size for the pmap.
3382 */
3383 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL)
3384 return (PMAP_NULL);
3385
3386 if (is_64bit) {
3387 p->min = MACH_VM_MIN_ADDRESS;
3388 p->max = MACH_VM_MAX_ADDRESS;
3389 } else {
3390 p->min = VM_MIN_ADDRESS;
3391 p->max = VM_MAX_ADDRESS;
3392 }
3393
3394 p->nested_region_true_start = 0;
3395 p->nested_region_true_end = ~0;
3396
3397 p->ref_count = 1;
3398 p->gc_status = 0;
3399 p->stamp = hw_atomic_add(&pmap_stamp, 1);
3400 p->nx_enabled = TRUE;
3401 p->is_64bit = is_64bit;
3402 p->nested = FALSE;
3403 p->nested_pmap = PMAP_NULL;
3404
3405
3406
3407 p->ledger = ledger;
3408
3409 PMAP_LOCK_INIT(p);
3410 #if (__ARM_VMSA__ == 7)
3411 simple_lock_init(&p->tt1_lock, 0);
3412 p->cpu_ref = 0;
3413 #endif
3414 memset((void *) &p->stats, 0, sizeof(p->stats));
3415
3416 p->tt_entry_free = (tt_entry_t *)0;
3417
3418 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
3419 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
3420 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
3421
3422 #if (__ARM_VMSA__ == 7)
3423 p->tte_index_max = NTTES;
3424 #else
3425 p->tte_index_max = (PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t));
3426 #endif
3427 p->prev_tte = (tt_entry_t *) NULL;
3428
3429 /* nullify the translation table */
3430 for (i = 0; i < p->tte_index_max; i++)
3431 p->tte[i] = ARM_TTE_TYPE_FAULT;
3432
3433 FLUSH_PTE_RANGE(p->tte, p->tte + p->tte_index_max);
3434
3435 /* assign a asid */
3436 p->vasid = alloc_asid();
3437 p->asid = p->vasid % ARM_MAX_ASID;
3438
3439 /*
3440 * initialize the rest of the structure
3441 */
3442 p->nested_region_grand_addr = 0x0ULL;
3443 p->nested_region_subord_addr = 0x0ULL;
3444 p->nested_region_size = 0x0ULL;
3445 p->nested_region_asid_bitmap = NULL;
3446 p->nested_region_asid_bitmap_size = 0x0UL;
3447
3448 p->nested_has_no_bounds_ref = false;
3449 p->nested_no_bounds_refcnt = 0;
3450 p->nested_bounds_set = false;
3451
3452
3453 #if MACH_ASSERT
3454 p->pmap_stats_assert = TRUE;
3455 p->pmap_pid = 0;
3456 strlcpy(p->pmap_procname, "<nil>", sizeof (p->pmap_procname));
3457 #endif /* MACH_ASSERT */
3458 #if DEVELOPMENT || DEBUG
3459 p->footprint_was_suspended = FALSE;
3460 #endif /* DEVELOPMENT || DEBUG */
3461
3462 pmap_simple_lock(&pmaps_lock);
3463 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
3464 pmap_simple_unlock(&pmaps_lock);
3465
3466 return (p);
3467 }
3468
3469 pmap_t
3470 pmap_create(
3471 ledger_t ledger,
3472 vm_map_size_t size,
3473 boolean_t is_64bit)
3474 {
3475 pmap_t pmap;
3476
3477 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, is_64bit);
3478
3479 ledger_reference(ledger);
3480
3481 pmap = pmap_create_internal(ledger, size, is_64bit);
3482
3483 if (pmap == PMAP_NULL) {
3484 ledger_dereference(ledger);
3485 }
3486
3487 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
3488
3489 return pmap;
3490 }
3491
3492 #if MACH_ASSERT
3493 MARK_AS_PMAP_TEXT static void
3494 pmap_set_process_internal(
3495 __unused pmap_t pmap,
3496 __unused int pid,
3497 __unused char *procname)
3498 {
3499 #if MACH_ASSERT
3500 if (pmap == NULL) {
3501 return;
3502 }
3503
3504 VALIDATE_PMAP(pmap);
3505
3506 pmap->pmap_pid = pid;
3507 strlcpy(pmap->pmap_procname, procname, sizeof (pmap->pmap_procname));
3508 if (pmap_ledgers_panic_leeway) {
3509 /*
3510 * XXX FBDP
3511 * Some processes somehow trigger some issues that make
3512 * the pmap stats and ledgers go off track, causing
3513 * some assertion failures and ledger panics.
3514 * Turn off the sanity checks if we allow some ledger leeway
3515 * because of that. We'll still do a final check in
3516 * pmap_check_ledgers() for discrepancies larger than the
3517 * allowed leeway after the address space has been fully
3518 * cleaned up.
3519 */
3520 pmap->pmap_stats_assert = FALSE;
3521 ledger_disable_panic_on_negative(pmap->ledger,
3522 task_ledgers.phys_footprint);
3523 ledger_disable_panic_on_negative(pmap->ledger,
3524 task_ledgers.internal);
3525 ledger_disable_panic_on_negative(pmap->ledger,
3526 task_ledgers.internal_compressed);
3527 ledger_disable_panic_on_negative(pmap->ledger,
3528 task_ledgers.iokit_mapped);
3529 ledger_disable_panic_on_negative(pmap->ledger,
3530 task_ledgers.alternate_accounting);
3531 ledger_disable_panic_on_negative(pmap->ledger,
3532 task_ledgers.alternate_accounting_compressed);
3533 }
3534 #endif /* MACH_ASSERT */
3535 }
3536 #endif /* MACH_ASSERT*/
3537
3538 #if MACH_ASSERT
3539 void
3540 pmap_set_process(
3541 pmap_t pmap,
3542 int pid,
3543 char *procname)
3544 {
3545 pmap_set_process_internal(pmap, pid, procname);
3546 }
3547
3548 /*
3549 * We maintain stats and ledgers so that a task's physical footprint is:
3550 * phys_footprint = ((internal - alternate_accounting)
3551 * + (internal_compressed - alternate_accounting_compressed)
3552 * + iokit_mapped
3553 * + purgeable_nonvolatile
3554 * + purgeable_nonvolatile_compressed
3555 * + page_table)
3556 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3557 */
3558
3559 struct {
3560 uint64_t num_pmaps_checked;
3561
3562 int phys_footprint_over;
3563 ledger_amount_t phys_footprint_over_total;
3564 ledger_amount_t phys_footprint_over_max;
3565 int phys_footprint_under;
3566 ledger_amount_t phys_footprint_under_total;
3567 ledger_amount_t phys_footprint_under_max;
3568
3569 int internal_over;
3570 ledger_amount_t internal_over_total;
3571 ledger_amount_t internal_over_max;
3572 int internal_under;
3573 ledger_amount_t internal_under_total;
3574 ledger_amount_t internal_under_max;
3575
3576 int internal_compressed_over;
3577 ledger_amount_t internal_compressed_over_total;
3578 ledger_amount_t internal_compressed_over_max;
3579 int internal_compressed_under;
3580 ledger_amount_t internal_compressed_under_total;
3581 ledger_amount_t internal_compressed_under_max;
3582
3583 int iokit_mapped_over;
3584 ledger_amount_t iokit_mapped_over_total;
3585 ledger_amount_t iokit_mapped_over_max;
3586 int iokit_mapped_under;
3587 ledger_amount_t iokit_mapped_under_total;
3588 ledger_amount_t iokit_mapped_under_max;
3589
3590 int alternate_accounting_over;
3591 ledger_amount_t alternate_accounting_over_total;
3592 ledger_amount_t alternate_accounting_over_max;
3593 int alternate_accounting_under;
3594 ledger_amount_t alternate_accounting_under_total;
3595 ledger_amount_t alternate_accounting_under_max;
3596
3597 int alternate_accounting_compressed_over;
3598 ledger_amount_t alternate_accounting_compressed_over_total;
3599 ledger_amount_t alternate_accounting_compressed_over_max;
3600 int alternate_accounting_compressed_under;
3601 ledger_amount_t alternate_accounting_compressed_under_total;
3602 ledger_amount_t alternate_accounting_compressed_under_max;
3603
3604 int page_table_over;
3605 ledger_amount_t page_table_over_total;
3606 ledger_amount_t page_table_over_max;
3607 int page_table_under;
3608 ledger_amount_t page_table_under_total;
3609 ledger_amount_t page_table_under_max;
3610
3611 int purgeable_volatile_over;
3612 ledger_amount_t purgeable_volatile_over_total;
3613 ledger_amount_t purgeable_volatile_over_max;
3614 int purgeable_volatile_under;
3615 ledger_amount_t purgeable_volatile_under_total;
3616 ledger_amount_t purgeable_volatile_under_max;
3617
3618 int purgeable_nonvolatile_over;
3619 ledger_amount_t purgeable_nonvolatile_over_total;
3620 ledger_amount_t purgeable_nonvolatile_over_max;
3621 int purgeable_nonvolatile_under;
3622 ledger_amount_t purgeable_nonvolatile_under_total;
3623 ledger_amount_t purgeable_nonvolatile_under_max;
3624
3625 int purgeable_volatile_compressed_over;
3626 ledger_amount_t purgeable_volatile_compressed_over_total;
3627 ledger_amount_t purgeable_volatile_compressed_over_max;
3628 int purgeable_volatile_compressed_under;
3629 ledger_amount_t purgeable_volatile_compressed_under_total;
3630 ledger_amount_t purgeable_volatile_compressed_under_max;
3631
3632 int purgeable_nonvolatile_compressed_over;
3633 ledger_amount_t purgeable_nonvolatile_compressed_over_total;
3634 ledger_amount_t purgeable_nonvolatile_compressed_over_max;
3635 int purgeable_nonvolatile_compressed_under;
3636 ledger_amount_t purgeable_nonvolatile_compressed_under_total;
3637 ledger_amount_t purgeable_nonvolatile_compressed_under_max;
3638
3639 int network_volatile_over;
3640 ledger_amount_t network_volatile_over_total;
3641 ledger_amount_t network_volatile_over_max;
3642 int network_volatile_under;
3643 ledger_amount_t network_volatile_under_total;
3644 ledger_amount_t network_volatile_under_max;
3645
3646 int network_nonvolatile_over;
3647 ledger_amount_t network_nonvolatile_over_total;
3648 ledger_amount_t network_nonvolatile_over_max;
3649 int network_nonvolatile_under;
3650 ledger_amount_t network_nonvolatile_under_total;
3651 ledger_amount_t network_nonvolatile_under_max;
3652
3653 int network_volatile_compressed_over;
3654 ledger_amount_t network_volatile_compressed_over_total;
3655 ledger_amount_t network_volatile_compressed_over_max;
3656 int network_volatile_compressed_under;
3657 ledger_amount_t network_volatile_compressed_under_total;
3658 ledger_amount_t network_volatile_compressed_under_max;
3659
3660 int network_nonvolatile_compressed_over;
3661 ledger_amount_t network_nonvolatile_compressed_over_total;
3662 ledger_amount_t network_nonvolatile_compressed_over_max;
3663 int network_nonvolatile_compressed_under;
3664 ledger_amount_t network_nonvolatile_compressed_under_total;
3665 ledger_amount_t network_nonvolatile_compressed_under_max;
3666 } pmap_ledgers_drift;
3667 #endif /* MACH_ASSERT */
3668
3669 /*
3670 * Retire the given physical map from service.
3671 * Should only be called if the map contains
3672 * no valid mappings.
3673 */
3674 MARK_AS_PMAP_TEXT static void
3675 pmap_destroy_internal(
3676 pmap_t pmap)
3677 {
3678 if (pmap == PMAP_NULL)
3679 return;
3680
3681 VALIDATE_PMAP(pmap);
3682
3683 int32_t ref_count = __c11_atomic_fetch_sub(&pmap->ref_count, 1, memory_order_relaxed) - 1;
3684 if (ref_count > 0)
3685 return;
3686 else if (ref_count < 0)
3687 panic("pmap %p: refcount underflow", pmap);
3688 else if (pmap == kernel_pmap)
3689 panic("pmap %p: attempt to destroy kernel pmap", pmap);
3690
3691 #if (__ARM_VMSA__ == 7)
3692 pt_entry_t *ttep;
3693 unsigned int i;
3694
3695 pmap_simple_lock(&pmaps_lock);
3696 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3697 pmap->gc_status |= PMAP_GC_WAIT;
3698 assert_wait((event_t) & pmap->gc_status, THREAD_UNINT);
3699 pmap_simple_unlock(&pmaps_lock);
3700 (void) thread_block(THREAD_CONTINUE_NULL);
3701 pmap_simple_lock(&pmaps_lock);
3702
3703 }
3704 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3705 pmap_simple_unlock(&pmaps_lock);
3706
3707 if (pmap->cpu_ref != 0)
3708 panic("pmap_destroy(%p): cpu_ref = %u", pmap, pmap->cpu_ref);
3709
3710 pmap_trim_self(pmap);
3711
3712 /*
3713 * Free the memory maps, then the
3714 * pmap structure.
3715 */
3716 PMAP_LOCK(pmap);
3717 for (i = 0; i < pmap->tte_index_max; i++) {
3718 ttep = &pmap->tte[i];
3719 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3720 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
3721 }
3722 }
3723 PMAP_UNLOCK(pmap);
3724
3725 if (pmap->tte) {
3726 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max*sizeof(tt_entry_t), 0);
3727 pmap->tte = (tt_entry_t *) NULL;
3728 pmap->ttep = 0;
3729 pmap->tte_index_max = 0;
3730 }
3731 if (pmap->prev_tte) {
3732 pmap_tt1_deallocate(pmap, pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0);
3733 pmap->prev_tte = (tt_entry_t *) NULL;
3734 }
3735 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3736
3737 flush_mmu_tlb_asid(pmap->asid);
3738 /* return its asid to the pool */
3739 free_asid(pmap->vasid);
3740 pmap_check_ledgers(pmap);
3741
3742
3743 if (pmap->nested_region_asid_bitmap)
3744 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size*sizeof(unsigned int));
3745 zfree(pmap_zone, pmap);
3746 #else /* __ARM_VMSA__ == 7 */
3747 pt_entry_t *ttep;
3748 pmap_paddr_t pa;
3749 vm_map_address_t c;
3750
3751 pmap_unmap_sharedpage(pmap);
3752
3753 pmap_simple_lock(&pmaps_lock);
3754 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3755 pmap->gc_status |= PMAP_GC_WAIT;
3756 assert_wait((event_t) & pmap->gc_status, THREAD_UNINT);
3757 pmap_simple_unlock(&pmaps_lock);
3758 (void) thread_block(THREAD_CONTINUE_NULL);
3759 pmap_simple_lock(&pmaps_lock);
3760 }
3761 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3762 pmap_simple_unlock(&pmaps_lock);
3763
3764 pmap_trim_self(pmap);
3765
3766 /*
3767 * Free the memory maps, then the
3768 * pmap structure.
3769 */
3770 for (c = pmap->min; c < pmap->max; c += ARM_TT_L2_SIZE) {
3771 ttep = pmap_tt2e(pmap, c);
3772 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3773 PMAP_LOCK(pmap);
3774 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L2_LEVEL);
3775 PMAP_UNLOCK(pmap);
3776 }
3777 }
3778 #if !__ARM64_TWO_LEVEL_PMAP__
3779 for (c = pmap->min; c < pmap->max; c += ARM_TT_L1_SIZE) {
3780 ttep = pmap_tt1e(pmap, c);
3781 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3782 PMAP_LOCK(pmap);
3783 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
3784 PMAP_UNLOCK(pmap);
3785 }
3786 }
3787 #endif
3788
3789
3790 if (pmap->tte) {
3791 pa = pmap->ttep;
3792 pmap_tt1_deallocate(pmap, (tt_entry_t *)phystokv(pa), PMAP_ROOT_ALLOC_SIZE, 0);
3793 }
3794
3795 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3796 flush_mmu_tlb_asid((uint64_t)(pmap->asid) << TLBI_ASID_SHIFT);
3797 free_asid(pmap->vasid);
3798
3799 if (pmap->nested_region_asid_bitmap) {
3800 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size*sizeof(unsigned int));
3801 }
3802
3803 pmap_check_ledgers(pmap);
3804
3805 zfree(pmap_zone, pmap);
3806
3807 #endif /* __ARM_VMSA__ == 7 */
3808 }
3809
3810 void
3811 pmap_destroy(
3812 pmap_t pmap)
3813 {
3814 ledger_t ledger;
3815
3816 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
3817
3818 ledger = pmap->ledger;
3819
3820 pmap_destroy_internal(pmap);
3821
3822 ledger_dereference(ledger);
3823
3824 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
3825 }
3826
3827
3828 /*
3829 * Add a reference to the specified pmap.
3830 */
3831 MARK_AS_PMAP_TEXT static void
3832 pmap_reference_internal(
3833 pmap_t pmap)
3834 {
3835 if (pmap != PMAP_NULL) {
3836 VALIDATE_PMAP(pmap);
3837 __c11_atomic_fetch_add(&pmap->ref_count, 1, memory_order_relaxed);
3838 }
3839 }
3840
3841 void
3842 pmap_reference(
3843 pmap_t pmap)
3844 {
3845 pmap_reference_internal(pmap);
3846 }
3847
3848 static tt_entry_t *
3849 pmap_tt1_allocate(
3850 pmap_t pmap,
3851 vm_size_t size,
3852 unsigned option)
3853 {
3854 tt_entry_t *tt1;
3855 tt_free_entry_t *tt1_free;
3856 pmap_paddr_t pa;
3857 vm_address_t va;
3858 vm_address_t va_end;
3859 kern_return_t ret;
3860
3861 pmap_simple_lock(&pmaps_lock);
3862 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
3863 free_page_size_tt_count--;
3864 tt1 = (tt_entry_t *)free_page_size_tt_list;
3865 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3866 pmap_simple_unlock(&pmaps_lock);
3867 pmap_tt_ledger_credit(pmap, size);
3868 return (tt_entry_t *)tt1;
3869 };
3870 if ((size == 2*PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
3871 free_two_page_size_tt_count--;
3872 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
3873 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3874 pmap_simple_unlock(&pmaps_lock);
3875 pmap_tt_ledger_credit(pmap, size);
3876 return (tt_entry_t *)tt1;
3877 };
3878 if (free_tt_count != 0) {
3879 free_tt_count--;
3880 tt1 = (tt_entry_t *)free_tt_list;
3881 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
3882 pmap_simple_unlock(&pmaps_lock);
3883 pmap_tt_ledger_credit(pmap, size);
3884 return (tt_entry_t *)tt1;
3885 }
3886
3887 pmap_simple_unlock(&pmaps_lock);
3888
3889 ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
3890
3891 if(ret == KERN_RESOURCE_SHORTAGE)
3892 return (tt_entry_t *)0;
3893
3894
3895 if (size < PAGE_SIZE) {
3896 pmap_simple_lock(&pmaps_lock);
3897
3898 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + size; va < va_end; va = va+size) {
3899 tt1_free = (tt_free_entry_t *)va;
3900 tt1_free->next = free_tt_list;
3901 free_tt_list = tt1_free;
3902 free_tt_count++;
3903 }
3904 if (free_tt_count > free_tt_max)
3905 free_tt_max = free_tt_count;
3906
3907 pmap_simple_unlock(&pmaps_lock);
3908 }
3909
3910 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
3911 * Depending on the device, this can vary between 512b and 16K. */
3912 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3913 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
3914 pmap_tt_ledger_credit(pmap, size);
3915
3916 return (tt_entry_t *) phystokv(pa);
3917 }
3918
3919 static void
3920 pmap_tt1_deallocate(
3921 pmap_t pmap,
3922 tt_entry_t *tt,
3923 vm_size_t size,
3924 unsigned option)
3925 {
3926 tt_free_entry_t *tt_entry;
3927
3928 tt_entry = (tt_free_entry_t *)tt;
3929 if (not_in_kdp)
3930 pmap_simple_lock(&pmaps_lock);
3931
3932 if (size < PAGE_SIZE) {
3933 free_tt_count++;
3934 if (free_tt_count > free_tt_max)
3935 free_tt_max = free_tt_count;
3936 tt_entry->next = free_tt_list;
3937 free_tt_list = tt_entry;
3938 }
3939
3940 if (size == PAGE_SIZE) {
3941 free_page_size_tt_count++;
3942 if (free_page_size_tt_count > free_page_size_tt_max)
3943 free_page_size_tt_max = free_page_size_tt_count;
3944 tt_entry->next = free_page_size_tt_list;
3945 free_page_size_tt_list = tt_entry;
3946 }
3947
3948 if (size == 2*PAGE_SIZE) {
3949 free_two_page_size_tt_count++;
3950 if (free_two_page_size_tt_count > free_two_page_size_tt_max)
3951 free_two_page_size_tt_max = free_two_page_size_tt_count;
3952 tt_entry->next = free_two_page_size_tt_list;
3953 free_two_page_size_tt_list = tt_entry;
3954 }
3955
3956 if ((option & PMAP_TT_DEALLOCATE_NOBLOCK) || (!not_in_kdp)) {
3957 if (not_in_kdp)
3958 pmap_simple_unlock(&pmaps_lock);
3959 pmap_tt_ledger_debit(pmap, size);
3960 return;
3961 }
3962
3963 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
3964
3965 free_page_size_tt_count--;
3966 tt = (tt_entry_t *)free_page_size_tt_list;
3967 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3968
3969 pmap_simple_unlock(&pmaps_lock);
3970
3971 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
3972
3973 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3974
3975 pmap_simple_lock(&pmaps_lock);
3976 }
3977
3978 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
3979 free_two_page_size_tt_count--;
3980 tt = (tt_entry_t *)free_two_page_size_tt_list;
3981 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3982
3983 pmap_simple_unlock(&pmaps_lock);
3984
3985 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2*PAGE_SIZE);
3986
3987 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3988
3989 pmap_simple_lock(&pmaps_lock);
3990 }
3991 pmap_simple_unlock(&pmaps_lock);
3992 pmap_tt_ledger_debit(pmap, size);
3993 }
3994
3995 static kern_return_t
3996 pmap_tt_allocate(
3997 pmap_t pmap,
3998 tt_entry_t **ttp,
3999 unsigned int level,
4000 unsigned int options)
4001 {
4002 pmap_paddr_t pa;
4003 *ttp = NULL;
4004
4005 PMAP_LOCK(pmap);
4006 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
4007 tt_free_entry_t *tt_free_next;
4008
4009 tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
4010 *ttp = (tt_entry_t *)pmap->tt_entry_free;
4011 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
4012 }
4013 PMAP_UNLOCK(pmap);
4014
4015 if (*ttp == NULL) {
4016 pt_desc_t *ptdp;
4017
4018 /*
4019 * Allocate a VM page for the level x page table entries.
4020 */
4021 while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
4022 if(options & PMAP_OPTIONS_NOWAIT) {
4023 return KERN_RESOURCE_SHORTAGE;
4024 }
4025 VM_PAGE_WAIT();
4026 }
4027
4028 if (level < PMAP_TT_MAX_LEVEL) {
4029 OSAddAtomic64(1, &alloc_ttepages_count);
4030 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4031 } else {
4032 OSAddAtomic64(1, &alloc_ptepages_count);
4033 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4034 }
4035
4036 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
4037
4038 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
4039
4040 ptdp = ptd_alloc(pmap);
4041 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
4042
4043 __unreachable_ok_push
4044 if (TEST_PAGE_RATIO_4) {
4045 vm_address_t va;
4046 vm_address_t va_end;
4047
4048 PMAP_LOCK(pmap);
4049
4050 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va+ARM_PGBYTES) {
4051 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4052 pmap->tt_entry_free = (tt_entry_t *)va;
4053 }
4054 PMAP_UNLOCK(pmap);
4055 }
4056 __unreachable_ok_pop
4057
4058 *ttp = (tt_entry_t *)phystokv(pa);
4059 }
4060
4061
4062 return KERN_SUCCESS;
4063 }
4064
4065
4066 static void
4067 pmap_tt_deallocate(
4068 pmap_t pmap,
4069 tt_entry_t *ttp,
4070 unsigned int level)
4071 {
4072 pt_desc_t *ptdp;
4073 unsigned pt_acc_cnt;
4074 unsigned i, max_pt_index = PAGE_RATIO;
4075 vm_offset_t free_page=0;
4076
4077 PMAP_LOCK(pmap);
4078
4079 ptdp = ptep_get_ptd((vm_offset_t)ttp);
4080
4081 ptdp->pt_map[ARM_PT_DESC_INDEX(ttp)].va = (vm_offset_t)-1;
4082
4083 if ((level < PMAP_TT_MAX_LEVEL) && (ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT))
4084 ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4085
4086 if (ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt != 0)
4087 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt);
4088
4089 ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4090
4091 for (i = 0, pt_acc_cnt = 0 ; i < max_pt_index ; i++)
4092 pt_acc_cnt += ptdp->pt_cnt[i].refcnt;
4093
4094 if (pt_acc_cnt == 0) {
4095 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4096 unsigned pt_free_entry_cnt = 1;
4097
4098 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
4099 tt_free_entry_t *tt_free_list_next;
4100
4101 tt_free_list_next = tt_free_list->next;
4102 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
4103 pt_free_entry_cnt++;
4104 }
4105 tt_free_list = tt_free_list_next;
4106 }
4107 if (pt_free_entry_cnt == max_pt_index) {
4108 tt_free_entry_t *tt_free_list_cur;
4109
4110 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
4111 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4112 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
4113
4114 while (tt_free_list_cur) {
4115 tt_free_entry_t *tt_free_list_next;
4116
4117 tt_free_list_next = tt_free_list_cur->next;
4118 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
4119 tt_free_list->next = tt_free_list_next->next;
4120 } else {
4121 tt_free_list = tt_free_list_next;
4122 }
4123 tt_free_list_cur = tt_free_list_next;
4124 }
4125 } else {
4126 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4127 pmap->tt_entry_free = ttp;
4128 }
4129 } else {
4130 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4131 pmap->tt_entry_free = ttp;
4132 }
4133
4134 PMAP_UNLOCK(pmap);
4135
4136 if (free_page != 0) {
4137
4138 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
4139 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
4140 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
4141 if (level < PMAP_TT_MAX_LEVEL)
4142 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4143 else
4144 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4145 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
4146 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
4147 }
4148 }
4149
4150 static void
4151 pmap_tte_remove(
4152 pmap_t pmap,
4153 tt_entry_t *ttep,
4154 unsigned int level)
4155 {
4156 tt_entry_t tte = *ttep;
4157
4158 if (tte == 0) {
4159 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
4160 }
4161
4162 if (((level+1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->pt_cnt[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
4163 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
4164 tte_get_ptd(tte), (tte_get_ptd(tte)->pt_cnt[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
4165 }
4166
4167 #if (__ARM_VMSA__ == 7)
4168 {
4169 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
4170 unsigned i;
4171
4172 for (i = 0; i<4; i++, ttep_4M++)
4173 *ttep_4M = (tt_entry_t) 0;
4174 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
4175 }
4176 #else
4177 *ttep = (tt_entry_t) 0;
4178 FLUSH_PTE_STRONG(ttep);
4179 #endif
4180 }
4181
4182 static void
4183 pmap_tte_deallocate(
4184 pmap_t pmap,
4185 tt_entry_t *ttep,
4186 unsigned int level)
4187 {
4188 pmap_paddr_t pa;
4189 tt_entry_t tte;
4190
4191 PMAP_ASSERT_LOCKED(pmap);
4192
4193 tte = *ttep;
4194
4195 #if MACH_ASSERT
4196 if (tte_get_ptd(tte)->pmap != pmap) {
4197 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4198 tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
4199 }
4200 #endif
4201
4202 pmap_tte_remove(pmap, ttep, level);
4203
4204 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4205 #if MACH_ASSERT
4206 {
4207 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
4208 unsigned i;
4209
4210 for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++,pte_p++) {
4211 if (ARM_PTE_IS_COMPRESSED(*pte_p)) {
4212 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4213 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4214 } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
4215 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4216 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4217 }
4218 }
4219 }
4220 #endif
4221 PMAP_UNLOCK(pmap);
4222
4223 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4224 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4225 pa = tte_to_pa(tte) & ~ARM_PGMASK;
4226 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level+1);
4227 PMAP_LOCK(pmap);
4228 }
4229 }
4230
4231 /*
4232 * Remove a range of hardware page-table entries.
4233 * The entries given are the first (inclusive)
4234 * and last (exclusive) entries for the VM pages.
4235 * The virtual address is the va for the first pte.
4236 *
4237 * The pmap must be locked.
4238 * If the pmap is not the kernel pmap, the range must lie
4239 * entirely within one pte-page. This is NOT checked.
4240 * Assumes that the pte-page exists.
4241 *
4242 * Returns the number of PTE changed, and sets *rmv_cnt
4243 * to the number of SPTE changed.
4244 */
4245 static int
4246 pmap_remove_range(
4247 pmap_t pmap,
4248 vm_map_address_t va,
4249 pt_entry_t *bpte,
4250 pt_entry_t *epte,
4251 uint32_t *rmv_cnt)
4252 {
4253 return pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
4254 PMAP_OPTIONS_REMOVE);
4255 }
4256
4257
4258 #ifdef PVH_FLAG_EXEC
4259
4260 /*
4261 * Update the access protection bits of the physical aperture mapping for a page.
4262 * This is useful, for example, in guranteeing that a verified executable page
4263 * has no writable mappings anywhere in the system, including the physical
4264 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
4265 * synchronization overhead in cases where the call to this function is
4266 * guaranteed to be followed by other TLB operations.
4267 */
4268 static void
4269 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
4270 {
4271 #if __ARM_PTE_PHYSMAP__
4272 ASSERT_PVH_LOCKED(pai);
4273 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
4274 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
4275
4276 pt_entry_t tmplate = *pte_p;
4277 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap))
4278 return;
4279 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
4280 #if (__ARM_VMSA__ > 7)
4281 if (tmplate & ARM_PTE_HINT_MASK) {
4282 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4283 __func__, pte_p, (void *)kva, tmplate);
4284 }
4285 #endif
4286 WRITE_PTE_STRONG(pte_p, tmplate);
4287 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
4288 if (!flush_tlb_async)
4289 sync_tlb_flush();
4290 #endif
4291 }
4292
4293 #endif /* defined(PVH_FLAG_EXEC) */
4294
4295 static void
4296 pmap_remove_pv(
4297 pmap_t pmap,
4298 pt_entry_t *cpte,
4299 int pai,
4300 int *num_internal,
4301 int *num_alt_internal,
4302 int *num_reusable,
4303 int *num_external)
4304 {
4305 pv_entry_t **pv_h, **pve_pp;
4306 pv_entry_t *pve_p;
4307
4308 ASSERT_PVH_LOCKED(pai);
4309 pv_h = pai_to_pvh(pai);
4310 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
4311
4312
4313 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4314 if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0))
4315 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
4316 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
4317 assert(IS_INTERNAL_PAGE(pai));
4318 (*num_internal)++;
4319 (*num_alt_internal)++;
4320 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
4321 } else if (IS_INTERNAL_PAGE(pai)) {
4322 if (IS_REUSABLE_PAGE(pai)) {
4323 (*num_reusable)++;
4324 } else {
4325 (*num_internal)++;
4326 }
4327 } else {
4328 (*num_external)++;
4329 }
4330 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
4331 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4332
4333 pve_pp = pv_h;
4334 pve_p = pvh_list(pv_h);
4335
4336 while (pve_p != PV_ENTRY_NULL &&
4337 (pve_get_ptep(pve_p) != cpte)) {
4338 pve_pp = pve_link_field(pve_p);
4339 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4340 }
4341
4342 if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0))
4343 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
4344
4345 #if MACH_ASSERT
4346 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
4347 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4348 while (check_pve_p != PV_ENTRY_NULL) {
4349 if (pve_get_ptep(check_pve_p) == cpte) {
4350 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
4351 __func__, cpte, pmap, pv_h, pve_p, pai);
4352 }
4353 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
4354 }
4355 }
4356 #endif
4357
4358 if (IS_ALTACCT_PAGE(pai, pve_p)) {
4359 assert(IS_INTERNAL_PAGE(pai));
4360 (*num_internal)++;
4361 (*num_alt_internal)++;
4362 CLR_ALTACCT_PAGE(pai, pve_p);
4363 } else if (IS_INTERNAL_PAGE(pai)) {
4364 if (IS_REUSABLE_PAGE(pai)) {
4365 (*num_reusable)++;
4366 } else {
4367 (*num_internal)++;
4368 }
4369 } else {
4370 (*num_external)++;
4371 }
4372
4373 pvh_remove(pv_h, pve_pp, pve_p);
4374 pv_free(pve_p);
4375 if (!pvh_test_type(pv_h, PVH_TYPE_NULL))
4376 pvh_set_flags(pv_h, pvh_flags);
4377 } else {
4378 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
4379 __func__, *pv_h, cpte, pmap, pv_h, pai);
4380 }
4381
4382 #ifdef PVH_FLAG_EXEC
4383 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL))
4384 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
4385 #endif
4386 }
4387
4388 static int
4389 pmap_remove_range_options(
4390 pmap_t pmap,
4391 vm_map_address_t va,
4392 pt_entry_t *bpte,
4393 pt_entry_t *epte,
4394 uint32_t *rmv_cnt,
4395 int options)
4396 {
4397 pt_entry_t *cpte;
4398 int num_removed, num_unwired;
4399 int num_pte_changed;
4400 int pai = 0;
4401 pmap_paddr_t pa;
4402 int num_external, num_internal, num_reusable;
4403 int num_alt_internal;
4404 uint64_t num_compressed, num_alt_compressed;
4405
4406 PMAP_ASSERT_LOCKED(pmap);
4407
4408 num_removed = 0;
4409 num_unwired = 0;
4410 num_pte_changed = 0;
4411 num_external = 0;
4412 num_internal = 0;
4413 num_reusable = 0;
4414 num_compressed = 0;
4415 num_alt_internal = 0;
4416 num_alt_compressed = 0;
4417
4418 for (cpte = bpte; cpte < epte;
4419 cpte += PAGE_SIZE/ARM_PGBYTES, va += PAGE_SIZE) {
4420 pt_entry_t spte;
4421 boolean_t managed=FALSE;
4422
4423 spte = *cpte;
4424
4425 #if CONFIG_PGTRACE
4426 if (pgtrace_enabled)
4427 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
4428 #endif
4429
4430 while (!managed) {
4431 if (pmap != kernel_pmap &&
4432 (options & PMAP_OPTIONS_REMOVE) &&
4433 (ARM_PTE_IS_COMPRESSED(spte))) {
4434 /*
4435 * "pmap" must be locked at this point,
4436 * so this should not race with another
4437 * pmap_remove_range() or pmap_enter().
4438 */
4439
4440 /* one less "compressed"... */
4441 num_compressed++;
4442 if (spte & ARM_PTE_COMPRESSED_ALT) {
4443 /* ... but it used to be "ALTACCT" */
4444 num_alt_compressed++;
4445 }
4446
4447 /* clear marker */
4448 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4449 /*
4450 * "refcnt" also accounts for
4451 * our "compressed" markers,
4452 * so let's update it here.
4453 */
4454 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->pt_cnt[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0)
4455 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4456 spte = *cpte;
4457 }
4458 /*
4459 * It may be possible for the pte to transition from managed
4460 * to unmanaged in this timeframe; for now, elide the assert.
4461 * We should break out as a consequence of checking pa_valid.
4462 */
4463 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4464 pa = pte_to_pa(spte);
4465 if (!pa_valid(pa)) {
4466 break;
4467 }
4468 pai = (int)pa_index(pa);
4469 LOCK_PVH(pai);
4470 spte = *cpte;
4471 pa = pte_to_pa(spte);
4472 if (pai == (int)pa_index(pa)) {
4473 managed =TRUE;
4474 break; // Leave pai locked as we will unlock it after we free the PV entry
4475 }
4476 UNLOCK_PVH(pai);
4477 }
4478
4479 if (ARM_PTE_IS_COMPRESSED(*cpte)) {
4480 /*
4481 * There used to be a valid mapping here but it
4482 * has already been removed when the page was
4483 * sent to the VM compressor, so nothing left to
4484 * remove now...
4485 */
4486 continue;
4487 }
4488
4489 /* remove the translation, do not flush the TLB */
4490 if (*cpte != ARM_PTE_TYPE_FAULT) {
4491 assert(!ARM_PTE_IS_COMPRESSED(*cpte));
4492 #if MACH_ASSERT
4493 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
4494 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4495 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
4496 }
4497 #endif
4498 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4499 num_pte_changed++;
4500 }
4501
4502 if ((spte != ARM_PTE_TYPE_FAULT) &&
4503 (pmap != kernel_pmap)) {
4504 assert(!ARM_PTE_IS_COMPRESSED(spte));
4505 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->pt_cnt[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0)
4506 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4507 if(rmv_cnt) (*rmv_cnt)++;
4508 }
4509
4510 if (pte_is_wired(spte)) {
4511 pte_set_wired(cpte, 0);
4512 num_unwired++;
4513 }
4514 /*
4515 * if not managed, we're done
4516 */
4517 if (!managed)
4518 continue;
4519 /*
4520 * find and remove the mapping from the chain for this
4521 * physical address.
4522 */
4523
4524 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
4525
4526 UNLOCK_PVH(pai);
4527 num_removed++;
4528 }
4529
4530 /*
4531 * Update the counts
4532 */
4533 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
4534 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
4535
4536 if (pmap != kernel_pmap) {
4537 /* sanity checks... */
4538 #if MACH_ASSERT
4539 if (pmap->stats.internal < num_internal) {
4540 if ((! pmap_stats_assert ||
4541 ! pmap->pmap_stats_assert)) {
4542 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4543 pmap->pmap_pid,
4544 pmap->pmap_procname,
4545 pmap,
4546 (uint64_t) va,
4547 bpte,
4548 epte,
4549 options,
4550 num_internal,
4551 num_removed,
4552 num_unwired,
4553 num_external,
4554 num_reusable,
4555 num_compressed,
4556 num_alt_internal,
4557 num_alt_compressed,
4558 num_pte_changed,
4559 pmap->stats.internal,
4560 pmap->stats.reusable);
4561 } else {
4562 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4563 pmap->pmap_pid,
4564 pmap->pmap_procname,
4565 pmap,
4566 (uint64_t) va,
4567 bpte,
4568 epte,
4569 options,
4570 num_internal,
4571 num_removed,
4572 num_unwired,
4573 num_external,
4574 num_reusable,
4575 num_compressed,
4576 num_alt_internal,
4577 num_alt_compressed,
4578 num_pte_changed,
4579 pmap->stats.internal,
4580 pmap->stats.reusable);
4581 }
4582 }
4583 #endif /* MACH_ASSERT */
4584 PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
4585 pmap,
4586 "pmap=%p num_external=%d stats.external=%d",
4587 pmap, num_external, pmap->stats.external);
4588 PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
4589 pmap,
4590 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4591 pmap,
4592 num_internal, pmap->stats.internal,
4593 num_reusable, pmap->stats.reusable);
4594 PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
4595 pmap,
4596 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4597 pmap,
4598 num_internal, pmap->stats.internal,
4599 num_reusable, pmap->stats.reusable);
4600 PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
4601 pmap,
4602 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4603 pmap, num_compressed, num_alt_compressed,
4604 pmap->stats.compressed);
4605
4606 /* update pmap stats... */
4607 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
4608 if (num_external)
4609 OSAddAtomic(-num_external, &pmap->stats.external);
4610 if (num_internal)
4611 OSAddAtomic(-num_internal, &pmap->stats.internal);
4612 if (num_reusable)
4613 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
4614 if (num_compressed)
4615 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
4616 /* ... and ledgers */
4617 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
4618 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
4619 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
4620 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
4621 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
4622 /* make needed adjustments to phys_footprint */
4623 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
4624 machine_ptob((num_internal -
4625 num_alt_internal) +
4626 (num_compressed -
4627 num_alt_compressed)));
4628 }
4629
4630 /* flush the ptable entries we have written */
4631 if (num_pte_changed > 0)
4632 FLUSH_PTE_RANGE_STRONG(bpte, epte);
4633
4634 return num_pte_changed;
4635 }
4636
4637
4638 /*
4639 * Remove the given range of addresses
4640 * from the specified map.
4641 *
4642 * It is assumed that the start and end are properly
4643 * rounded to the hardware page size.
4644 */
4645 void
4646 pmap_remove(
4647 pmap_t pmap,
4648 vm_map_address_t start,
4649 vm_map_address_t end)
4650 {
4651 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
4652 }
4653
4654 MARK_AS_PMAP_TEXT static int
4655 pmap_remove_options_internal(
4656 pmap_t pmap,
4657 vm_map_address_t start,
4658 vm_map_address_t end,
4659 int options)
4660 {
4661 int remove_count = 0;
4662 pt_entry_t *bpte, *epte;
4663 pt_entry_t *pte_p;
4664 tt_entry_t *tte_p;
4665 uint32_t rmv_spte=0;
4666
4667 if (__improbable(end < start))
4668 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
4669
4670 VALIDATE_PMAP(pmap);
4671 PMAP_LOCK(pmap);
4672
4673 tte_p = pmap_tte(pmap, start);
4674
4675 if (tte_p == (tt_entry_t *) NULL) {
4676 goto done;
4677 }
4678
4679 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4680 pte_p = (pt_entry_t *) ttetokv(*tte_p);
4681 bpte = &pte_p[ptenum(start)];
4682 epte = bpte + ((end - start) >> ARM_TT_LEAF_SHIFT);
4683
4684 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
4685 &rmv_spte, options);
4686
4687 #if (__ARM_VMSA__ == 7)
4688 if (rmv_spte && (ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
4689 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
4690 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
4691 flush_mmu_tlb_entry((start & ~ARM_TT_L1_OFFMASK) | (pmap->asid & 0xff));
4692 }
4693 #else
4694 if (rmv_spte && (ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
4695 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
4696 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
4697 flush_mmu_tlb_entry(tlbi_addr(start & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
4698 }
4699 #endif
4700 }
4701
4702 done:
4703 PMAP_UNLOCK(pmap);
4704 return remove_count;
4705 }
4706
4707 void
4708 pmap_remove_options(
4709 pmap_t pmap,
4710 vm_map_address_t start,
4711 vm_map_address_t end,
4712 int options)
4713 {
4714 int remove_count = 0;
4715 vm_map_address_t va;
4716
4717 if (pmap == PMAP_NULL)
4718 return;
4719
4720 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
4721 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
4722 VM_KERNEL_ADDRHIDE(end));
4723
4724 #if MACH_ASSERT
4725 if ((start|end) & PAGE_MASK) {
4726 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
4727 pmap, (uint64_t)start, (uint64_t)end);
4728 }
4729 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
4730 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
4731 pmap, (uint64_t)start, (uint64_t)end);
4732 }
4733 #endif
4734
4735 /*
4736 * Invalidate the translation buffer first
4737 */
4738 va = start;
4739 while (va < end) {
4740 vm_map_address_t l;
4741
4742 #if (__ARM_VMSA__ == 7)
4743 l = ((va + ARM_TT_L1_SIZE) & ~ARM_TT_L1_OFFMASK);
4744 #else
4745 l = ((va + ARM_TT_L2_SIZE) & ~ARM_TT_L2_OFFMASK);
4746 #endif
4747 if (l > end)
4748 l = end;
4749
4750 remove_count += pmap_remove_options_internal(pmap, va, l, options);
4751
4752 va = l;
4753 }
4754
4755 if (remove_count > 0)
4756 PMAP_UPDATE_TLBS(pmap, start, end);
4757
4758 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
4759 }
4760
4761
4762 /*
4763 * Remove phys addr if mapped in specified map
4764 */
4765 void
4766 pmap_remove_some_phys(
4767 __unused pmap_t map,
4768 __unused ppnum_t pn)
4769 {
4770 /* Implement to support working set code */
4771 }
4772
4773 void
4774 pmap_set_pmap(
4775 pmap_t pmap,
4776 #if !__ARM_USER_PROTECT__
4777 __unused
4778 #endif
4779 thread_t thread)
4780 {
4781 pmap_switch(pmap);
4782 #if __ARM_USER_PROTECT__
4783 if (pmap->tte_index_max == NTTES) {
4784 thread->machine.uptw_ttc = 2;
4785 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
4786 } else {
4787 thread->machine.uptw_ttc = 1; \
4788 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep ) | TTBR_SETUP;
4789 }
4790 thread->machine.asid = pmap->asid;
4791 #endif
4792 }
4793
4794 static void
4795 pmap_flush_core_tlb_asid(pmap_t pmap)
4796 {
4797 #if (__ARM_VMSA__ == 7)
4798 flush_core_tlb_asid(pmap->asid);
4799 #else
4800 flush_core_tlb_asid(((uint64_t) pmap->asid) << TLBI_ASID_SHIFT);
4801 #endif
4802 }
4803
4804 MARK_AS_PMAP_TEXT static void
4805 pmap_switch_internal(
4806 pmap_t pmap)
4807 {
4808 VALIDATE_PMAP(pmap);
4809 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
4810 uint32_t last_asid_high_bits, asid_high_bits;
4811 boolean_t do_asid_flush = FALSE;
4812
4813 #if (__ARM_VMSA__ == 7)
4814 if (not_in_kdp)
4815 pmap_simple_lock(&pmap->tt1_lock);
4816 #else
4817 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
4818 #endif
4819
4820 /* Paranoia. */
4821 assert(pmap->asid < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
4822
4823 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
4824 asid_high_bits = pmap->vasid >> ARM_ASID_SHIFT;
4825 last_asid_high_bits = (uint32_t) cpu_data_ptr->cpu_asid_high_bits[pmap->asid];
4826
4827 if (asid_high_bits != last_asid_high_bits) {
4828 /*
4829 * If the virtual ASID of the new pmap does not match the virtual ASID
4830 * last seen on this CPU for the physical ASID (that was a mouthful),
4831 * then this switch runs the risk of aliasing. We need to flush the
4832 * TLB for this phyiscal ASID in this case.
4833 */
4834 cpu_data_ptr->cpu_asid_high_bits[pmap->asid] = (uint8_t) asid_high_bits;
4835 do_asid_flush = TRUE;
4836 }
4837
4838 pmap_switch_user_ttb_internal(pmap);
4839
4840 #if (__ARM_VMSA__ > 7)
4841 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
4842 * to flush the userspace mappings for that region. Those mappings are global
4843 * and will not be protected by the ASID. It should also be cheaper to flush the
4844 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
4845 if ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap))
4846 flush_core_tlb();
4847 else
4848 #endif
4849 if (do_asid_flush)
4850 pmap_flush_core_tlb_asid(pmap);
4851
4852 #if (__ARM_VMSA__ == 7)
4853 if (not_in_kdp)
4854 pmap_simple_unlock(&pmap->tt1_lock);
4855 #endif
4856 }
4857
4858 void
4859 pmap_switch(
4860 pmap_t pmap)
4861 {
4862 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
4863 pmap_switch_internal(pmap);
4864 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
4865 }
4866
4867 void
4868 pmap_page_protect(
4869 ppnum_t ppnum,
4870 vm_prot_t prot)
4871 {
4872 pmap_page_protect_options(ppnum, prot, 0, NULL);
4873 }
4874
4875 /*
4876 * Routine: pmap_page_protect_options
4877 *
4878 * Function:
4879 * Lower the permission for all mappings to a given
4880 * page.
4881 */
4882 MARK_AS_PMAP_TEXT static void
4883 pmap_page_protect_options_internal(
4884 ppnum_t ppnum,
4885 vm_prot_t prot,
4886 unsigned int options)
4887 {
4888 pmap_paddr_t phys = ptoa(ppnum);
4889 pv_entry_t **pv_h;
4890 pv_entry_t **pve_pp;
4891 pv_entry_t *pve_p;
4892 pv_entry_t *pveh_p;
4893 pv_entry_t *pvet_p;
4894 pt_entry_t *pte_p;
4895 pv_entry_t *new_pve_p;
4896 pt_entry_t *new_pte_p;
4897 vm_offset_t pvh_flags;
4898 int pai;
4899 boolean_t remove;
4900 boolean_t set_NX;
4901 boolean_t tlb_flush_needed = FALSE;
4902 unsigned int pvh_cnt = 0;
4903
4904 assert(ppnum != vm_page_fictitious_addr);
4905
4906 /* Only work with managed pages. */
4907 if (!pa_valid(phys)) {
4908 return;
4909 }
4910
4911 /*
4912 * Determine the new protection.
4913 */
4914 switch (prot) {
4915 case VM_PROT_ALL:
4916 return; /* nothing to do */
4917 case VM_PROT_READ:
4918 case VM_PROT_READ | VM_PROT_EXECUTE:
4919 remove = FALSE;
4920 break;
4921 default:
4922 remove = TRUE;
4923 break;
4924 }
4925
4926 pai = (int)pa_index(phys);
4927 LOCK_PVH(pai);
4928 pv_h = pai_to_pvh(pai);
4929 pvh_flags = pvh_get_flags(pv_h);
4930
4931
4932 pte_p = PT_ENTRY_NULL;
4933 pve_p = PV_ENTRY_NULL;
4934 pve_pp = pv_h;
4935 pveh_p = PV_ENTRY_NULL;
4936 pvet_p = PV_ENTRY_NULL;
4937 new_pve_p = PV_ENTRY_NULL;
4938 new_pte_p = PT_ENTRY_NULL;
4939 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4940 pte_p = pvh_ptep(pv_h);
4941 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4942 pve_p = pvh_list(pv_h);
4943 pveh_p = pve_p;
4944 }
4945
4946 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
4947 vm_map_address_t va;
4948 pmap_t pmap;
4949 pt_entry_t tmplate;
4950 boolean_t update = FALSE;
4951
4952 if (pve_p != PV_ENTRY_NULL)
4953 pte_p = pve_get_ptep(pve_p);
4954
4955 #ifdef PVH_FLAG_IOMMU
4956 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
4957 if (remove) {
4958 if (options & PMAP_OPTIONS_COMPRESSOR) {
4959 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
4960 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
4961 }
4962 if (pve_p != PV_ENTRY_NULL) {
4963 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4964 pvh_remove(pv_h, pve_pp, pve_p);
4965 pveh_p = pvh_list(pv_h);
4966 pve_next(pve_p) = new_pve_p;
4967 new_pve_p = pve_p;
4968 pve_p = temp_pve_p;
4969 continue;
4970 } else {
4971 new_pte_p = pte_p;
4972 break;
4973 }
4974 }
4975 goto protect_skip_pve;
4976 }
4977 #endif
4978 pmap = ptep_get_pmap(pte_p);
4979 va = ptep_get_va(pte_p);
4980
4981 if (pte_p == PT_ENTRY_NULL) {
4982 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
4983 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
4984 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
4985 #if MACH_ASSERT
4986 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
4987
4988 pv_entry_t *check_pve_p = pveh_p;
4989 while (check_pve_p != PV_ENTRY_NULL) {
4990 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
4991 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
4992 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
4993 }
4994 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
4995 }
4996 }
4997 #endif
4998 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
4999 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
5000 }
5001
5002 #if DEVELOPMENT || DEBUG
5003 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5004 #else
5005 if ((prot & VM_PROT_EXECUTE))
5006 #endif
5007 set_NX = FALSE;
5008 else
5009 set_NX = TRUE;
5010
5011 /* Remove the mapping if new protection is NONE */
5012 if (remove) {
5013 boolean_t is_altacct = FALSE;
5014
5015 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5016 is_altacct = TRUE;
5017 } else {
5018 is_altacct = FALSE;
5019 }
5020
5021 if (pte_is_wired(*pte_p)) {
5022 pte_set_wired(pte_p, 0);
5023 if (pmap != kernel_pmap) {
5024 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5025 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5026 }
5027 }
5028
5029 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5030 pmap != kernel_pmap &&
5031 (options & PMAP_OPTIONS_COMPRESSOR) &&
5032 IS_INTERNAL_PAGE(pai)) {
5033 assert(!ARM_PTE_IS_COMPRESSED(*pte_p));
5034 /* mark this PTE as having been "compressed" */
5035 tmplate = ARM_PTE_COMPRESSED;
5036 if (is_altacct) {
5037 tmplate |= ARM_PTE_COMPRESSED_ALT;
5038 is_altacct = TRUE;
5039 }
5040 } else {
5041 tmplate = ARM_PTE_TYPE_FAULT;
5042 }
5043
5044 if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
5045 tmplate == ARM_PTE_TYPE_FAULT &&
5046 (pmap != kernel_pmap)) {
5047 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0)
5048 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
5049 }
5050
5051 if (*pte_p != tmplate) {
5052 WRITE_PTE_STRONG(pte_p, tmplate);
5053 update = TRUE;
5054 }
5055 pvh_cnt++;
5056 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
5057 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
5058
5059 #if MACH_ASSERT
5060 /*
5061 * We only ever compress internal pages.
5062 */
5063 if (options & PMAP_OPTIONS_COMPRESSOR) {
5064 assert(IS_INTERNAL_PAGE(pai));
5065 }
5066 #endif
5067
5068 if (pmap != kernel_pmap) {
5069 if (IS_REUSABLE_PAGE(pai) &&
5070 IS_INTERNAL_PAGE(pai) &&
5071 !is_altacct) {
5072 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
5073 OSAddAtomic(-1, &pmap->stats.reusable);
5074 } else if (IS_INTERNAL_PAGE(pai)) {
5075 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
5076 OSAddAtomic(-1, &pmap->stats.internal);
5077 } else {
5078 PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
5079 OSAddAtomic(-1, &pmap->stats.external);
5080 }
5081 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
5082 IS_INTERNAL_PAGE(pai)) {
5083 /* adjust "compressed" stats */
5084 OSAddAtomic64(+1, &pmap->stats.compressed);
5085 PMAP_STATS_PEAK(pmap->stats.compressed);
5086 pmap->stats.compressed_lifetime++;
5087 }
5088
5089 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5090 assert(IS_INTERNAL_PAGE(pai));
5091 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5092 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
5093 if (options & PMAP_OPTIONS_COMPRESSOR) {
5094 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5095 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
5096 }
5097
5098 /*
5099 * Cleanup our marker before
5100 * we free this pv_entry.
5101 */
5102 CLR_ALTACCT_PAGE(pai, pve_p);
5103
5104 } else if (IS_REUSABLE_PAGE(pai)) {
5105 assert(IS_INTERNAL_PAGE(pai));
5106 if (options & PMAP_OPTIONS_COMPRESSOR) {
5107 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5108 /* was not in footprint, but is now */
5109 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5110 }
5111
5112 } else if (IS_INTERNAL_PAGE(pai)) {
5113 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5114
5115 /*
5116 * Update all stats related to physical footprint, which only
5117 * deals with internal pages.
5118 */
5119 if (options & PMAP_OPTIONS_COMPRESSOR) {
5120 /*
5121 * This removal is only being done so we can send this page to
5122 * the compressor; therefore it mustn't affect total task footprint.
5123 */
5124 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5125 } else {
5126 /*
5127 * This internal page isn't going to the compressor, so adjust stats to keep
5128 * phys_footprint up to date.
5129 */
5130 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5131 }
5132 } else {
5133 /* external page: no impact on ledgers */
5134 }
5135 }
5136
5137 if (pve_p != PV_ENTRY_NULL) {
5138 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5139 }
5140
5141 } else {
5142 pt_entry_t spte;
5143
5144 spte = *pte_p;
5145
5146 if (pmap == kernel_pmap)
5147 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5148 else
5149 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
5150
5151 pte_set_ffr(tmplate, 0);
5152
5153 #if (__ARM_VMSA__ == 7)
5154 if (set_NX) {
5155 tmplate |= ARM_PTE_NX;
5156 } else {
5157 /*
5158 * While the naive implementation of this would serve to add execute
5159 * permission, this is not how the VM uses this interface, or how
5160 * x86_64 implements it. So ignore requests to add execute permissions.
5161 */
5162 #if 0
5163 tmplate &= ~ARM_PTE_NX;
5164 #else
5165 ;
5166 #endif
5167 }
5168 #else
5169 if (set_NX)
5170 tmplate |= ARM_PTE_NX | ARM_PTE_PNX;
5171 else {
5172 /*
5173 * While the naive implementation of this would serve to add execute
5174 * permission, this is not how the VM uses this interface, or how
5175 * x86_64 implements it. So ignore requests to add execute permissions.
5176 */
5177 #if 0
5178 if (pmap == kernel_pmap) {
5179 tmplate &= ~ARM_PTE_PNX;
5180 tmplate |= ARM_PTE_NX;
5181 } else {
5182 tmplate &= ~ARM_PTE_NX;
5183 tmplate |= ARM_PTE_PNX;
5184 }
5185 #else
5186 ;
5187 #endif
5188 }
5189 #endif
5190
5191
5192 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5193 !ARM_PTE_IS_COMPRESSED(*pte_p) &&
5194 *pte_p != tmplate) {
5195 WRITE_PTE_STRONG(pte_p, tmplate);
5196 update = TRUE;
5197 }
5198 }
5199
5200 /* Invalidate TLBs for all CPUs using it */
5201 if (update) {
5202 tlb_flush_needed = TRUE;
5203 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
5204 }
5205
5206 #ifdef PVH_FLAG_IOMMU
5207 protect_skip_pve:
5208 #endif
5209 pte_p = PT_ENTRY_NULL;
5210 pvet_p = pve_p;
5211 if (pve_p != PV_ENTRY_NULL) {
5212 if (remove) {
5213 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5214 }
5215 pve_pp = pve_link_field(pve_p);
5216 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5217 }
5218 }
5219
5220 #ifdef PVH_FLAG_EXEC
5221 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC))
5222 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
5223 #endif
5224 if (tlb_flush_needed)
5225 sync_tlb_flush();
5226
5227 /* if we removed a bunch of entries, take care of them now */
5228 if (remove) {
5229 if (new_pve_p != PV_ENTRY_NULL) {
5230 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
5231 pvh_set_flags(pv_h, pvh_flags);
5232 } else if (new_pte_p != PT_ENTRY_NULL) {
5233 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
5234 pvh_set_flags(pv_h, pvh_flags);
5235 } else {
5236 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
5237 }
5238 }
5239
5240 UNLOCK_PVH(pai);
5241
5242 if (remove && (pvet_p != PV_ENTRY_NULL)) {
5243 pv_list_free(pveh_p, pvet_p, pvh_cnt);
5244 }
5245 }
5246
5247 void
5248 pmap_page_protect_options(
5249 ppnum_t ppnum,
5250 vm_prot_t prot,
5251 unsigned int options,
5252 __unused void *arg)
5253 {
5254 pmap_paddr_t phys = ptoa(ppnum);
5255
5256 assert(ppnum != vm_page_fictitious_addr);
5257
5258 /* Only work with managed pages. */
5259 if (!pa_valid(phys))
5260 return;
5261
5262 /*
5263 * Determine the new protection.
5264 */
5265 if (prot == VM_PROT_ALL) {
5266 return; /* nothing to do */
5267 }
5268
5269 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5270
5271 pmap_page_protect_options_internal(ppnum, prot, options);
5272
5273 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5274 }
5275
5276 /*
5277 * Indicates if the pmap layer enforces some additional restrictions on the
5278 * given set of protections.
5279 */
5280 bool pmap_has_prot_policy(__unused vm_prot_t prot)
5281 {
5282 return FALSE;
5283 }
5284
5285 /*
5286 * Set the physical protection on the
5287 * specified range of this map as requested.
5288 * VERY IMPORTANT: Will not increase permissions.
5289 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5290 */
5291 void
5292 pmap_protect(
5293 pmap_t pmap,
5294 vm_map_address_t b,
5295 vm_map_address_t e,
5296 vm_prot_t prot)
5297 {
5298 pmap_protect_options(pmap, b, e, prot, 0, NULL);
5299 }
5300
5301 MARK_AS_PMAP_TEXT static void
5302 pmap_protect_options_internal(pmap_t pmap,
5303 vm_map_address_t start,
5304 vm_map_address_t end,
5305 vm_prot_t prot,
5306 unsigned int options,
5307 __unused void *args)
5308 {
5309 tt_entry_t *tte_p;
5310 pt_entry_t *bpte_p, *epte_p;
5311 pt_entry_t *pte_p;
5312 boolean_t set_NX = TRUE;
5313 #if (__ARM_VMSA__ > 7)
5314 boolean_t set_XO = FALSE;
5315 #endif
5316 boolean_t should_have_removed = FALSE;
5317
5318 #ifndef __ARM_IC_NOALIAS_ICACHE__
5319 boolean_t InvalidatePoU_Icache_Done = FALSE;
5320 #endif
5321
5322 if (__improbable(end < start))
5323 panic("%s called with bogus range: %p, %p", __func__, (void*)start, (void*)end);
5324
5325 #if DEVELOPMENT || DEBUG
5326 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5327 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5328 should_have_removed = TRUE;
5329 }
5330 } else
5331 #endif
5332 {
5333 /* Determine the new protection. */
5334 switch (prot) {
5335 #if (__ARM_VMSA__ > 7)
5336 case VM_PROT_EXECUTE:
5337 set_XO = TRUE;
5338 /* fall through */
5339 #endif
5340 case VM_PROT_READ:
5341 case VM_PROT_READ | VM_PROT_EXECUTE:
5342 break;
5343 case VM_PROT_READ | VM_PROT_WRITE:
5344 case VM_PROT_ALL:
5345 return; /* nothing to do */
5346 default:
5347 should_have_removed = TRUE;
5348 }
5349 }
5350
5351 if (should_have_removed) {
5352 panic("%s: should have been a remove operation, "
5353 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5354 __FUNCTION__,
5355 pmap, (void *)start, (void *)end, prot, options, args);
5356 }
5357
5358 #if DEVELOPMENT || DEBUG
5359 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5360 #else
5361 if ((prot & VM_PROT_EXECUTE))
5362 #endif
5363 {
5364 set_NX = FALSE;
5365 } else {
5366 set_NX = TRUE;
5367 }
5368
5369 VALIDATE_PMAP(pmap);
5370 PMAP_LOCK(pmap);
5371 tte_p = pmap_tte(pmap, start);
5372
5373 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5374 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
5375 bpte_p = &bpte_p[ptenum(start)];
5376 epte_p = bpte_p + arm_atop(end - start);
5377 pte_p = bpte_p;
5378
5379 for (pte_p = bpte_p;
5380 pte_p < epte_p;
5381 pte_p += PAGE_SIZE/ARM_PGBYTES) {
5382 pt_entry_t spte;
5383 #if DEVELOPMENT || DEBUG
5384 boolean_t force_write = FALSE;
5385 #endif
5386
5387 spte = *pte_p;
5388
5389 if ((spte == ARM_PTE_TYPE_FAULT) ||
5390 ARM_PTE_IS_COMPRESSED(spte)) {
5391 continue;
5392 }
5393
5394 pmap_paddr_t pa;
5395 int pai=0;
5396 boolean_t managed=FALSE;
5397
5398 while (!managed) {
5399 /*
5400 * It may be possible for the pte to transition from managed
5401 * to unmanaged in this timeframe; for now, elide the assert.
5402 * We should break out as a consequence of checking pa_valid.
5403 */
5404 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5405 pa = pte_to_pa(spte);
5406 if (!pa_valid(pa))
5407 break;
5408 pai = (int)pa_index(pa);
5409 LOCK_PVH(pai);
5410 spte = *pte_p;
5411 pa = pte_to_pa(spte);
5412 if (pai == (int)pa_index(pa)) {
5413 managed =TRUE;
5414 break; // Leave the PVH locked as we will unlock it after we free the PTE
5415 }
5416 UNLOCK_PVH(pai);
5417 }
5418
5419 if ((spte == ARM_PTE_TYPE_FAULT) ||
5420 ARM_PTE_IS_COMPRESSED(spte)) {
5421 continue;
5422 }
5423
5424 pt_entry_t tmplate;
5425
5426 if (pmap == kernel_pmap) {
5427 #if DEVELOPMENT || DEBUG
5428 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5429 force_write = TRUE;
5430 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
5431 } else
5432 #endif
5433 {
5434 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5435 }
5436 } else {
5437 #if DEVELOPMENT || DEBUG
5438 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5439 force_write = TRUE;
5440 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWRW));
5441 } else
5442 #endif
5443 {
5444 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
5445 }
5446 }
5447
5448 /*
5449 * XXX Removing "NX" would
5450 * grant "execute" access
5451 * immediately, bypassing any
5452 * checks VM might want to do
5453 * in its soft fault path.
5454 * pmap_protect() and co. are
5455 * not allowed to increase
5456 * access permissions.
5457 */
5458 #if (__ARM_VMSA__ == 7)
5459 if (set_NX)
5460 tmplate |= ARM_PTE_NX;
5461 else {
5462 /* do NOT clear "NX"! */
5463 }
5464 #else
5465 if (set_NX)
5466 tmplate |= ARM_PTE_NX | ARM_PTE_PNX;
5467 else {
5468 if (pmap == kernel_pmap) {
5469 /*
5470 * TODO: Run CS/Monitor checks here;
5471 * should we be clearing PNX here? Is
5472 * this just for dtrace?
5473 */
5474 tmplate &= ~ARM_PTE_PNX;
5475 tmplate |= ARM_PTE_NX;
5476 } else {
5477 /* do NOT clear "NX"! */
5478 tmplate |= ARM_PTE_PNX;
5479 if (set_XO) {
5480 tmplate &= ~ARM_PTE_APMASK;
5481 tmplate |= ARM_PTE_AP(AP_RONA);
5482 }
5483 }
5484 }
5485 #endif
5486
5487 #if DEVELOPMENT || DEBUG
5488 if (force_write) {
5489 /*
5490 * TODO: Run CS/Monitor checks here.
5491 */
5492 if (managed) {
5493 /*
5494 * We are marking the page as writable,
5495 * so we consider it to be modified and
5496 * referenced.
5497 */
5498 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5499 tmplate |= ARM_PTE_AF;
5500
5501 if (IS_REFFAULT_PAGE(pai)) {
5502 CLR_REFFAULT_PAGE(pai);
5503 }
5504
5505 if (IS_MODFAULT_PAGE(pai)) {
5506 CLR_MODFAULT_PAGE(pai);
5507 }
5508 }
5509 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5510 /*
5511 * An immediate request for anything other than
5512 * write should still mark the page as
5513 * referenced if managed.
5514 */
5515 if (managed) {
5516 pa_set_bits(pa, PP_ATTR_REFERENCED);
5517 tmplate |= ARM_PTE_AF;
5518
5519 if (IS_REFFAULT_PAGE(pai)) {
5520 CLR_REFFAULT_PAGE(pai);
5521 }
5522 }
5523 }
5524 #endif
5525
5526 /* We do not expect to write fast fault the entry. */
5527 pte_set_ffr(tmplate, 0);
5528
5529 /* TODO: Doesn't this need to worry about PNX? */
5530 if (((spte & ARM_PTE_NX) == ARM_PTE_NX) && (prot & VM_PROT_EXECUTE)) {
5531 CleanPoU_DcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
5532 #ifdef __ARM_IC_NOALIAS_ICACHE__
5533 InvalidatePoU_IcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
5534 #else
5535 if (!InvalidatePoU_Icache_Done) {
5536 InvalidatePoU_Icache();
5537 InvalidatePoU_Icache_Done = TRUE;
5538 }
5539 #endif
5540 }
5541
5542 WRITE_PTE_FAST(pte_p, tmplate);
5543
5544 if (managed) {
5545 ASSERT_PVH_LOCKED(pai);
5546 UNLOCK_PVH(pai);
5547 }
5548 }
5549
5550 FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
5551 PMAP_UPDATE_TLBS(pmap, start, end);
5552 }
5553
5554 PMAP_UNLOCK(pmap);
5555 }
5556
5557 void
5558 pmap_protect_options(
5559 pmap_t pmap,
5560 vm_map_address_t b,
5561 vm_map_address_t e,
5562 vm_prot_t prot,
5563 unsigned int options,
5564 __unused void *args)
5565 {
5566 vm_map_address_t l, beg;
5567
5568 if ((b|e) & PAGE_MASK) {
5569 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5570 pmap, (uint64_t)b, (uint64_t)e);
5571 }
5572
5573 #if DEVELOPMENT || DEBUG
5574 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5575 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5576 pmap_remove_options(pmap, b, e, options);
5577 return;
5578 }
5579 } else
5580 #endif
5581 {
5582 /* Determine the new protection. */
5583 switch (prot) {
5584 case VM_PROT_EXECUTE:
5585 case VM_PROT_READ:
5586 case VM_PROT_READ | VM_PROT_EXECUTE:
5587 break;
5588 case VM_PROT_READ | VM_PROT_WRITE:
5589 case VM_PROT_ALL:
5590 return; /* nothing to do */
5591 default:
5592 pmap_remove_options(pmap, b, e, options);
5593 return;
5594 }
5595 }
5596
5597 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
5598 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
5599 VM_KERNEL_ADDRHIDE(e));
5600
5601 beg = b;
5602
5603 while (beg < e) {
5604 l = ((beg + ARM_TT_TWIG_SIZE) & ~ARM_TT_TWIG_OFFMASK);
5605
5606 if (l > e)
5607 l = e;
5608
5609 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
5610
5611 beg = l;
5612 }
5613
5614 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5615 }
5616
5617 /* Map a (possibly) autogenned block */
5618 kern_return_t
5619 pmap_map_block(
5620 pmap_t pmap,
5621 addr64_t va,
5622 ppnum_t pa,
5623 uint32_t size,
5624 vm_prot_t prot,
5625 int attr,
5626 __unused unsigned int flags)
5627 {
5628 kern_return_t kr;
5629 addr64_t original_va = va;
5630 uint32_t page;
5631
5632 for (page = 0; page < size; page++) {
5633 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
5634
5635 if (kr != KERN_SUCCESS) {
5636 /*
5637 * This will panic for now, as it is unclear that
5638 * removing the mappings is correct.
5639 */
5640 panic("%s: failed pmap_enter, "
5641 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
5642 __FUNCTION__,
5643 pmap, va, pa, size, prot, flags);
5644
5645 pmap_remove(pmap, original_va, va - original_va);
5646 return kr;
5647 }
5648
5649 va += PAGE_SIZE;
5650 pa++;
5651 }
5652
5653 return KERN_SUCCESS;
5654 }
5655
5656 /*
5657 * Insert the given physical page (p) at
5658 * the specified virtual address (v) in the
5659 * target physical map with the protection requested.
5660 *
5661 * If specified, the page will be wired down, meaning
5662 * that the related pte can not be reclaimed.
5663 *
5664 * NB: This is the only routine which MAY NOT lazy-evaluate
5665 * or lose information. That is, this routine must actually
5666 * insert this page into the given map eventually (must make
5667 * forward progress eventually.
5668 */
5669 kern_return_t
5670 pmap_enter(
5671 pmap_t pmap,
5672 vm_map_address_t v,
5673 ppnum_t pn,
5674 vm_prot_t prot,
5675 vm_prot_t fault_type,
5676 unsigned int flags,
5677 boolean_t wired)
5678 {
5679 return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
5680 }
5681
5682
5683 static inline void pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
5684 {
5685 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED)))
5686 {
5687 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
5688 if (pte & ARM_PTE_WIRED) {
5689 OSAddAtomic16(1, ptd_wiredcnt_ptr);
5690 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5691 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
5692 } else {
5693 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
5694 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5695 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5696 }
5697 }
5698 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5699 !ARM_PTE_IS_COMPRESSED(*pte_p)) {
5700 WRITE_PTE_STRONG(pte_p, pte);
5701 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
5702 } else {
5703 WRITE_PTE(pte_p, pte);
5704 __builtin_arm_isb(ISB_SY);
5705 }
5706
5707 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + PAGE_SIZE), pte);
5708 }
5709
5710 static pt_entry_t
5711 wimg_to_pte(unsigned int wimg)
5712 {
5713 pt_entry_t pte;
5714
5715 switch (wimg & (VM_WIMG_MASK)) {
5716 case VM_WIMG_IO:
5717 case VM_WIMG_RT:
5718 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5719 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5720 break;
5721 case VM_WIMG_POSTED:
5722 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
5723 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5724 break;
5725 case VM_WIMG_WCOMB:
5726 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5727 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5728 break;
5729 case VM_WIMG_WTHRU:
5730 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
5731 #if (__ARM_VMSA__ > 7)
5732 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5733 #else
5734 pte |= ARM_PTE_SH;
5735 #endif
5736 break;
5737 case VM_WIMG_COPYBACK:
5738 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
5739 #if (__ARM_VMSA__ > 7)
5740 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5741 #else
5742 pte |= ARM_PTE_SH;
5743 #endif
5744 break;
5745 case VM_WIMG_INNERWBACK:
5746 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
5747 #if (__ARM_VMSA__ > 7)
5748 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
5749 #else
5750 pte |= ARM_PTE_SH;
5751 #endif
5752 break;
5753 default:
5754 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
5755 #if (__ARM_VMSA__ > 7)
5756 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5757 #else
5758 pte |= ARM_PTE_SH;
5759 #endif
5760 }
5761
5762 return pte;
5763 }
5764
5765 static boolean_t
5766 pmap_enter_pv(
5767 pmap_t pmap,
5768 pt_entry_t *pte_p,
5769 int pai,
5770 unsigned int options,
5771 pv_entry_t **pve_p,
5772 boolean_t *is_altacct)
5773 {
5774 pv_entry_t **pv_h;
5775 pv_h = pai_to_pvh(pai);
5776 boolean_t first_cpu_mapping;
5777
5778 ASSERT_PVH_LOCKED(pai);
5779
5780 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
5781
5782
5783 #ifdef PVH_FLAG_CPU
5784 /* An IOMMU mapping may already be present for a page that hasn't yet
5785 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
5786 * if this is the first CPU mapping. We base internal/reusable
5787 * accounting on the options specified for the first CPU mapping.
5788 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
5789 * as there are *any* mappings of the page. The accounting for a
5790 * page should not need to change until the page is recycled by the
5791 * VM layer, and we assert that there are no mappings when a page
5792 * is recycled. An IOMMU mapping of a freed/recycled page is
5793 * considered a security violation & potential DMA corruption path.*/
5794 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
5795 if (first_cpu_mapping)
5796 pvh_flags |= PVH_FLAG_CPU;
5797 #else
5798 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
5799 #endif
5800
5801 if (first_cpu_mapping) {
5802 if (options & PMAP_OPTIONS_INTERNAL) {
5803 SET_INTERNAL_PAGE(pai);
5804 } else {
5805 CLR_INTERNAL_PAGE(pai);
5806 }
5807 if ((options & PMAP_OPTIONS_INTERNAL) &&
5808 (options & PMAP_OPTIONS_REUSABLE)) {
5809 SET_REUSABLE_PAGE(pai);
5810 } else {
5811 CLR_REUSABLE_PAGE(pai);
5812 }
5813 }
5814 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
5815 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
5816 if (pmap != NULL && pmap != kernel_pmap &&
5817 ((options & PMAP_OPTIONS_ALT_ACCT) ||
5818 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
5819 IS_INTERNAL_PAGE(pai)) {
5820 /*
5821 * Make a note to ourselves that this mapping is using alternative
5822 * accounting. We'll need this in order to know which ledger to
5823 * debit when the mapping is removed.
5824 *
5825 * The altacct bit must be set while the pv head is locked. Defer
5826 * the ledger accounting until after we've dropped the lock.
5827 */
5828 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5829 *is_altacct = TRUE;
5830 } else {
5831 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5832 }
5833 } else {
5834 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5835 pt_entry_t *pte1_p;
5836
5837 /*
5838 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
5839 */
5840 pte1_p = pvh_ptep(pv_h);
5841 pvh_set_flags(pv_h, pvh_flags);
5842 if((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p)))
5843 return FALSE;
5844
5845 pve_set_ptep(*pve_p, pte1_p);
5846 (*pve_p)->pve_next = PV_ENTRY_NULL;
5847
5848 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
5849 /*
5850 * transfer "altacct" from
5851 * pp_attr to this pve
5852 */
5853 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5854 SET_ALTACCT_PAGE(pai, *pve_p);
5855 }
5856 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
5857 *pve_p = PV_ENTRY_NULL;
5858 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5859 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
5860 __func__, *pv_h, pte_p, pmap, pv_h);
5861 }
5862 /*
5863 * Set up pv_entry for this new mapping and then
5864 * add it to the list for this physical page.
5865 */
5866 pvh_set_flags(pv_h, pvh_flags);
5867 if((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p)))
5868 return FALSE;
5869
5870 pve_set_ptep(*pve_p, pte_p);
5871 (*pve_p)->pve_next = PV_ENTRY_NULL;
5872
5873 pvh_add(pv_h, *pve_p);
5874
5875 if (pmap != NULL && pmap != kernel_pmap &&
5876 ((options & PMAP_OPTIONS_ALT_ACCT) ||
5877 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
5878 IS_INTERNAL_PAGE(pai)) {
5879 /*
5880 * Make a note to ourselves that this
5881 * mapping is using alternative
5882 * accounting. We'll need this in order
5883 * to know which ledger to debit when
5884 * the mapping is removed.
5885 *
5886 * The altacct bit must be set while
5887 * the pv head is locked. Defer the
5888 * ledger accounting until after we've
5889 * dropped the lock.
5890 */
5891 SET_ALTACCT_PAGE(pai, *pve_p);
5892 *is_altacct = TRUE;
5893 }
5894
5895 *pve_p = PV_ENTRY_NULL;
5896 }
5897
5898 pvh_set_flags(pv_h, pvh_flags);
5899
5900 return TRUE;
5901 }
5902
5903 MARK_AS_PMAP_TEXT static kern_return_t
5904 pmap_enter_options_internal(
5905 pmap_t pmap,
5906 vm_map_address_t v,
5907 ppnum_t pn,
5908 vm_prot_t prot,
5909 vm_prot_t fault_type,
5910 unsigned int flags,
5911 boolean_t wired,
5912 unsigned int options)
5913 {
5914 pmap_paddr_t pa = ptoa(pn);
5915 pt_entry_t pte;
5916 pt_entry_t spte;
5917 pt_entry_t *pte_p;
5918 pv_entry_t *pve_p;
5919 boolean_t set_NX;
5920 boolean_t set_XO = FALSE;
5921 boolean_t refcnt_updated;
5922 boolean_t wiredcnt_updated;
5923 unsigned int wimg_bits;
5924 boolean_t was_compressed, was_alt_compressed;
5925 kern_return_t kr = KERN_SUCCESS;
5926
5927 VALIDATE_PMAP(pmap);
5928
5929 if ((v) & PAGE_MASK) {
5930 panic("pmap_enter_options() pmap %p v 0x%llx\n",
5931 pmap, (uint64_t)v);
5932 }
5933
5934 if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
5935 panic("pmap_enter_options(): WX request on kernel_pmap");
5936 }
5937
5938 #if DEVELOPMENT || DEBUG
5939 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5940 #else
5941 if ((prot & VM_PROT_EXECUTE))
5942 #endif
5943 set_NX = FALSE;
5944 else
5945 set_NX = TRUE;
5946
5947 #if (__ARM_VMSA__ > 7)
5948 if (prot == VM_PROT_EXECUTE) {
5949 set_XO = TRUE;
5950 }
5951 #endif
5952
5953 assert(pn != vm_page_fictitious_addr);
5954
5955 refcnt_updated = FALSE;
5956 wiredcnt_updated = FALSE;
5957 pve_p = PV_ENTRY_NULL;
5958 was_compressed = FALSE;
5959 was_alt_compressed = FALSE;
5960
5961 PMAP_LOCK(pmap);
5962
5963 /*
5964 * Expand pmap to include this pte. Assume that
5965 * pmap is always expanded to include enough hardware
5966 * pages to map one VM page.
5967 */
5968 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
5969 /* Must unlock to expand the pmap. */
5970 PMAP_UNLOCK(pmap);
5971
5972 kr = pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
5973
5974 if (kr != KERN_SUCCESS)
5975 return kr;
5976
5977 PMAP_LOCK(pmap);
5978 }
5979
5980 if (options & PMAP_OPTIONS_NOENTER) {
5981 PMAP_UNLOCK(pmap);
5982 return KERN_SUCCESS;
5983 }
5984
5985 Pmap_enter_retry:
5986
5987 spte = *pte_p;
5988
5989 if (ARM_PTE_IS_COMPRESSED(spte)) {
5990 /*
5991 * "pmap" should be locked at this point, so this should
5992 * not race with another pmap_enter() or pmap_remove_range().
5993 */
5994 assert(pmap != kernel_pmap);
5995
5996 /* one less "compressed" */
5997 OSAddAtomic64(-1, &pmap->stats.compressed);
5998 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
5999 PAGE_SIZE);
6000
6001 was_compressed = TRUE;
6002 if (spte & ARM_PTE_COMPRESSED_ALT) {
6003 was_alt_compressed = TRUE;
6004 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
6005 } else {
6006 /* was part of the footprint */
6007 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6008 }
6009
6010 /* clear "compressed" marker */
6011 /* XXX is it necessary since we're about to overwrite it ? */
6012 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
6013 spte = ARM_PTE_TYPE_FAULT;
6014
6015 /*
6016 * We're replacing a "compressed" marker with a valid PTE,
6017 * so no change for "refcnt".
6018 */
6019 refcnt_updated = TRUE;
6020 }
6021
6022 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
6023 pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
6024 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
6025 }
6026
6027 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
6028
6029 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6030 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6031 * to be wired in nearly all cases. For VM layer functionality, the wired
6032 * count in vm_page_t is sufficient. */
6033 if (wired && pmap != kernel_pmap)
6034 pte |= ARM_PTE_WIRED;
6035
6036 #if (__ARM_VMSA__ == 7)
6037 if (set_NX)
6038 pte |= ARM_PTE_NX;
6039 #else
6040 if (set_NX)
6041 pte |= ARM_PTE_NX | ARM_PTE_PNX;
6042 else {
6043 if (pmap == kernel_pmap) {
6044 pte |= ARM_PTE_NX;
6045 } else {
6046 pte |= ARM_PTE_PNX;
6047 }
6048 }
6049 #endif
6050
6051 if (pmap == kernel_pmap) {
6052 #if __ARM_KERNEL_PROTECT__
6053 pte |= ARM_PTE_NG;
6054 #endif /* __ARM_KERNEL_PROTECT__ */
6055 if (prot & VM_PROT_WRITE) {
6056 pte |= ARM_PTE_AP(AP_RWNA);
6057 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
6058 } else {
6059 pte |= ARM_PTE_AP(AP_RONA);
6060 pa_set_bits(pa, PP_ATTR_REFERENCED);
6061 }
6062 #if (__ARM_VMSA__ == 7)
6063 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH))
6064 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
6065 #endif
6066 } else {
6067 if (!(pmap->nested)) {
6068 pte |= ARM_PTE_NG;
6069 } else if ((pmap->nested_region_asid_bitmap)
6070 && (v >= pmap->nested_region_subord_addr)
6071 && (v < (pmap->nested_region_subord_addr+pmap->nested_region_size))) {
6072
6073 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> ARM_TT_TWIG_SHIFT);
6074
6075 if ((pmap->nested_region_asid_bitmap)
6076 && testbit(index, (int *)pmap->nested_region_asid_bitmap))
6077 pte |= ARM_PTE_NG;
6078 }
6079 #if MACH_ASSERT
6080 if (pmap->nested_pmap != NULL) {
6081 vm_map_address_t nest_vaddr;
6082 pt_entry_t *nest_pte_p;
6083
6084 nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
6085
6086 if ((nest_vaddr >= pmap->nested_region_subord_addr)
6087 && (nest_vaddr < (pmap->nested_region_subord_addr+pmap->nested_region_size))
6088 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
6089 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
6090 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p))
6091 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
6092 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> ARM_TT_TWIG_SHIFT);
6093
6094 if ((pmap->nested_pmap->nested_region_asid_bitmap)
6095 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
6096
6097 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
6098 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
6099 }
6100 }
6101
6102 }
6103 #endif
6104 if (prot & VM_PROT_WRITE) {
6105
6106 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
6107 if (fault_type & VM_PROT_WRITE) {
6108 if (set_XO)
6109 pte |= ARM_PTE_AP(AP_RWNA);
6110 else
6111 pte |= ARM_PTE_AP(AP_RWRW);
6112 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
6113 } else {
6114 if (set_XO)
6115 pte |= ARM_PTE_AP(AP_RONA);
6116 else
6117 pte |= ARM_PTE_AP(AP_RORO);
6118 pa_set_bits(pa, PP_ATTR_REFERENCED);
6119 pte_set_ffr(pte, 1);
6120 }
6121 } else {
6122 if (set_XO)
6123 pte |= ARM_PTE_AP(AP_RWNA);
6124 else
6125 pte |= ARM_PTE_AP(AP_RWRW);
6126 pa_set_bits(pa, PP_ATTR_REFERENCED);
6127 }
6128 } else {
6129
6130 if (set_XO)
6131 pte |= ARM_PTE_AP(AP_RONA);
6132 else
6133 pte |= ARM_PTE_AP(AP_RORO);
6134 pa_set_bits(pa, PP_ATTR_REFERENCED);
6135 }
6136 }
6137
6138 pte |= ARM_PTE_AF;
6139
6140 volatile uint16_t *refcnt = NULL;
6141 volatile uint16_t *wiredcnt = NULL;
6142 if (pmap != kernel_pmap) {
6143 refcnt = &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt);
6144 wiredcnt = &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
6145 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
6146 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6147 * a new PV entry. */
6148 if (!wiredcnt_updated) {
6149 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
6150 wiredcnt_updated = TRUE;
6151 }
6152 if (!refcnt_updated) {
6153 OSAddAtomic16(1, (volatile int16_t*)refcnt);
6154 refcnt_updated = TRUE;
6155 }
6156 }
6157
6158 if (pa_valid(pa)) {
6159 int pai;
6160 boolean_t is_altacct, is_internal;
6161
6162 is_internal = FALSE;
6163 is_altacct = FALSE;
6164
6165 pai = (int)pa_index(pa);
6166
6167 LOCK_PVH(pai);
6168
6169 Pmap_enter_loop:
6170 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT)))
6171 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6172 else
6173 wimg_bits = pmap_cache_attributes(pn);
6174
6175 /* We may be retrying this operation after dropping the PVH lock.
6176 * Cache attributes for the physical page may have changed while the lock
6177 * was dropped, so clear any cache attributes we may have previously set
6178 * in the PTE template. */
6179 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
6180 pte |= wimg_to_pte(wimg_bits);
6181
6182
6183
6184 if (pte == *pte_p) {
6185 /*
6186 * This pmap_enter operation has been completed by another thread
6187 * undo refcnt on pt and return
6188 */
6189 UNLOCK_PVH(pai);
6190 goto Pmap_enter_cleanup;
6191 } else if (pte_to_pa(*pte_p) == pa) {
6192 pmap_enter_pte(pmap, pte_p, pte, v);
6193 UNLOCK_PVH(pai);
6194 goto Pmap_enter_cleanup;
6195 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
6196 /*
6197 * pte has been modified by another thread
6198 * hold refcnt on pt and retry pmap_enter operation
6199 */
6200 UNLOCK_PVH(pai);
6201 goto Pmap_enter_retry;
6202 }
6203 if (!pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct))
6204 goto Pmap_enter_loop;
6205
6206 pmap_enter_pte(pmap, pte_p, pte, v);
6207
6208 if (pmap != kernel_pmap) {
6209 if (IS_REUSABLE_PAGE(pai) &&
6210 !is_altacct) {
6211 assert(IS_INTERNAL_PAGE(pai));
6212 OSAddAtomic(+1, &pmap->stats.reusable);
6213 PMAP_STATS_PEAK(pmap->stats.reusable);
6214 } else if (IS_INTERNAL_PAGE(pai)) {
6215 OSAddAtomic(+1, &pmap->stats.internal);
6216 PMAP_STATS_PEAK(pmap->stats.internal);
6217 is_internal = TRUE;
6218 } else {
6219 OSAddAtomic(+1, &pmap->stats.external);
6220 PMAP_STATS_PEAK(pmap->stats.external);
6221 }
6222 }
6223
6224 UNLOCK_PVH(pai);
6225
6226 if (pmap != kernel_pmap) {
6227 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
6228
6229 if (is_internal) {
6230 /*
6231 * Make corresponding adjustments to
6232 * phys_footprint statistics.
6233 */
6234 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
6235 if (is_altacct) {
6236 /*
6237 * If this page is internal and
6238 * in an IOKit region, credit
6239 * the task's total count of
6240 * dirty, internal IOKit pages.
6241 * It should *not* count towards
6242 * the task's total physical
6243 * memory footprint, because
6244 * this entire region was
6245 * already billed to the task
6246 * at the time the mapping was
6247 * created.
6248 *
6249 * Put another way, this is
6250 * internal++ and
6251 * alternate_accounting++, so
6252 * net effect on phys_footprint
6253 * is 0. That means: don't
6254 * touch phys_footprint here.
6255 */
6256 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
6257 } else {
6258 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6259 }
6260 }
6261 }
6262
6263 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
6264 if (pmap->stats.resident_count > pmap->stats.resident_max)
6265 pmap->stats.resident_max = pmap->stats.resident_count;
6266 } else {
6267
6268 if (prot & VM_PROT_EXECUTE) {
6269 kr = KERN_FAILURE;
6270 goto Pmap_enter_cleanup;
6271 }
6272
6273 wimg_bits = pmap_cache_attributes(pn);
6274 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT)))
6275 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6276
6277 pte |= wimg_to_pte(wimg_bits);
6278
6279 pmap_enter_pte(pmap, pte_p, pte, v);
6280 }
6281
6282 goto Pmap_enter_return;
6283
6284 Pmap_enter_cleanup:
6285
6286 if (refcnt != NULL) {
6287 assert(refcnt_updated);
6288 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0)
6289 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6290 }
6291
6292 Pmap_enter_return:
6293
6294 #if CONFIG_PGTRACE
6295 if (pgtrace_enabled) {
6296 // Clone and invalidate original mapping if eligible
6297 for (int i = 0; i < PAGE_RATIO; i++) {
6298 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES*i, 0, 0);
6299 }
6300 }
6301 #endif
6302
6303 if (pve_p != PV_ENTRY_NULL)
6304 pv_free(pve_p);
6305
6306 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0))
6307 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6308
6309 PMAP_UNLOCK(pmap);
6310
6311 return kr;
6312 }
6313
6314 kern_return_t
6315 pmap_enter_options(
6316 pmap_t pmap,
6317 vm_map_address_t v,
6318 ppnum_t pn,
6319 vm_prot_t prot,
6320 vm_prot_t fault_type,
6321 unsigned int flags,
6322 boolean_t wired,
6323 unsigned int options,
6324 __unused void *arg)
6325 {
6326 kern_return_t kr = KERN_FAILURE;
6327
6328 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
6329 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
6330
6331 kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
6332 pv_water_mark_check();
6333
6334 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
6335
6336 return kr;
6337 }
6338
6339 /*
6340 * Routine: pmap_change_wiring
6341 * Function: Change the wiring attribute for a map/virtual-address
6342 * pair.
6343 * In/out conditions:
6344 * The mapping must already exist in the pmap.
6345 */
6346 MARK_AS_PMAP_TEXT static void
6347 pmap_change_wiring_internal(
6348 pmap_t pmap,
6349 vm_map_address_t v,
6350 boolean_t wired)
6351 {
6352 pt_entry_t *pte_p;
6353 pmap_paddr_t pa;
6354
6355 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6356 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6357 * to be wired in nearly all cases. For VM layer functionality, the wired
6358 * count in vm_page_t is sufficient. */
6359 if (pmap == kernel_pmap) {
6360 return;
6361 }
6362 VALIDATE_USER_PMAP(pmap);
6363
6364 PMAP_LOCK(pmap);
6365 pte_p = pmap_pte(pmap, v);
6366 assert(pte_p != PT_ENTRY_NULL);
6367 pa = pte_to_pa(*pte_p);
6368 if (pa_valid(pa))
6369 LOCK_PVH((int)pa_index(pa));
6370
6371 if (wired && !pte_is_wired(*pte_p)) {
6372 pte_set_wired(pte_p, wired);
6373 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
6374 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6375 } else if (!wired && pte_is_wired(*pte_p)) {
6376 PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
6377 pte_set_wired(pte_p, wired);
6378 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6379 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6380 }
6381
6382 if (pa_valid(pa))
6383 UNLOCK_PVH((int)pa_index(pa));
6384
6385 PMAP_UNLOCK(pmap);
6386 }
6387
6388 void
6389 pmap_change_wiring(
6390 pmap_t pmap,
6391 vm_map_address_t v,
6392 boolean_t wired)
6393 {
6394 pmap_change_wiring_internal(pmap, v, wired);
6395 }
6396
6397 MARK_AS_PMAP_TEXT static ppnum_t
6398 pmap_find_phys_internal(
6399 pmap_t pmap,
6400 addr64_t va)
6401 {
6402 ppnum_t ppn=0;
6403
6404 VALIDATE_PMAP(pmap);
6405
6406 if (pmap != kernel_pmap) {
6407 PMAP_LOCK(pmap);
6408 }
6409
6410 ppn = pmap_vtophys(pmap, va);
6411
6412 if (pmap != kernel_pmap) {
6413 PMAP_UNLOCK(pmap);
6414 }
6415
6416 return ppn;
6417 }
6418
6419 ppnum_t
6420 pmap_find_phys(
6421 pmap_t pmap,
6422 addr64_t va)
6423 {
6424 pmap_paddr_t pa=0;
6425
6426 if (pmap == kernel_pmap)
6427 pa = mmu_kvtop(va);
6428 else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map)))
6429 pa = mmu_uvtop(va);
6430
6431 if (pa) return (ppnum_t)(pa >> PAGE_SHIFT);
6432
6433 if (not_in_kdp) {
6434 return pmap_find_phys_internal(pmap, va);
6435 } else {
6436 return pmap_vtophys(pmap, va);
6437 }
6438 }
6439
6440 pmap_paddr_t
6441 kvtophys(
6442 vm_offset_t va)
6443 {
6444 pmap_paddr_t pa;
6445
6446 pa = mmu_kvtop(va);
6447 if (pa) return pa;
6448 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
6449 if (pa)
6450 pa |= (va & PAGE_MASK);
6451
6452 return ((pmap_paddr_t)pa);
6453 }
6454
6455 ppnum_t
6456 pmap_vtophys(
6457 pmap_t pmap,
6458 addr64_t va)
6459 {
6460 if ((va < pmap->min) || (va >= pmap->max)) {
6461 return 0;
6462 }
6463
6464 #if (__ARM_VMSA__ == 7)
6465 tt_entry_t *tte_p, tte;
6466 pt_entry_t *pte_p;
6467 ppnum_t ppn;
6468
6469 tte_p = pmap_tte(pmap, va);
6470 if (tte_p == (tt_entry_t *) NULL)
6471 return (ppnum_t) 0;
6472
6473 tte = *tte_p;
6474 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6475 pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
6476 ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
6477 #if DEVELOPMENT || DEBUG
6478 if (ppn != 0 &&
6479 ARM_PTE_IS_COMPRESSED(*pte_p)) {
6480 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6481 pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
6482 }
6483 #endif /* DEVELOPMENT || DEBUG */
6484 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK)
6485 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER)
6486 ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
6487 else
6488 ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
6489 else
6490 ppn = 0;
6491 #else
6492 tt_entry_t *ttp;
6493 tt_entry_t tte;
6494 ppnum_t ppn=0;
6495
6496 /* Level 0 currently unused */
6497
6498 #if __ARM64_TWO_LEVEL_PMAP__
6499 /* We have no L1 entry; go straight to the L2 entry */
6500 ttp = pmap_tt2e(pmap, va);
6501 tte = *ttp;
6502 #else
6503 /* Get first-level (1GB) entry */
6504 ttp = pmap_tt1e(pmap, va);
6505 tte = *ttp;
6506 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID))
6507 return (ppn);
6508
6509 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, va)];
6510 #endif
6511 if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID))
6512 return (ppn);
6513
6514 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6515 ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK)| (va & ARM_TT_L2_OFFMASK));
6516 return(ppn);
6517 }
6518 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, va)];
6519 ppn = (ppnum_t) atop((tte & ARM_PTE_MASK)| (va & ARM_TT_L3_OFFMASK));
6520 #endif
6521
6522 return ppn;
6523 }
6524
6525 MARK_AS_PMAP_TEXT static vm_offset_t
6526 pmap_extract_internal(
6527 pmap_t pmap,
6528 vm_map_address_t va)
6529 {
6530 pmap_paddr_t pa=0;
6531 ppnum_t ppn=0;
6532
6533 if (pmap == NULL) {
6534 return 0;
6535 }
6536
6537 VALIDATE_PMAP(pmap);
6538
6539 PMAP_LOCK(pmap);
6540
6541 ppn = pmap_vtophys(pmap, va);
6542
6543 if (ppn != 0)
6544 pa = ptoa(ppn)| ((va) & PAGE_MASK);
6545
6546 PMAP_UNLOCK(pmap);
6547
6548 return pa;
6549 }
6550
6551 /*
6552 * Routine: pmap_extract
6553 * Function:
6554 * Extract the physical page address associated
6555 * with the given map/virtual_address pair.
6556 *
6557 */
6558 vm_offset_t
6559 pmap_extract(
6560 pmap_t pmap,
6561 vm_map_address_t va)
6562 {
6563 pmap_paddr_t pa=0;
6564
6565 if (pmap == kernel_pmap)
6566 pa = mmu_kvtop(va);
6567 else if (pmap == vm_map_pmap(current_thread()->map))
6568 pa = mmu_uvtop(va);
6569
6570 if (pa) return pa;
6571
6572 return pmap_extract_internal(pmap, va);
6573 }
6574
6575 /*
6576 * pmap_init_pte_page - Initialize a page table page.
6577 */
6578 void
6579 pmap_init_pte_page(
6580 pmap_t pmap,
6581 pt_entry_t *pte_p,
6582 vm_offset_t va,
6583 unsigned int ttlevel,
6584 boolean_t alloc_ptd)
6585 {
6586 pt_desc_t *ptdp = NULL;
6587 vm_offset_t *pvh;
6588
6589 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
6590
6591 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
6592 if (alloc_ptd) {
6593 /*
6594 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
6595 * on 4KB hardware, we may already have allocated a page table descriptor for a
6596 * bootstrap request, so we check for an existing PTD here.
6597 */
6598 ptdp = ptd_alloc(pmap);
6599 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
6600 } else {
6601 panic("pmap_init_pte_page(): pte_p %p", pte_p);
6602 }
6603 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
6604 ptdp = (pt_desc_t*)(pvh_list(pvh));
6605 } else {
6606 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
6607 }
6608
6609 bzero(pte_p, ARM_PGBYTES);
6610 // below barrier ensures the page zeroing is visible to PTW before
6611 // it is linked to the PTE of previous level
6612 __builtin_arm_dmb(DMB_ISHST);
6613 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
6614 }
6615
6616 /*
6617 * Routine: pmap_expand
6618 *
6619 * Expands a pmap to be able to map the specified virtual address.
6620 *
6621 * Allocates new memory for the default (COARSE) translation table
6622 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6623 * also allocates space for the corresponding pv entries.
6624 *
6625 * Nothing should be locked.
6626 */
6627 static kern_return_t
6628 pmap_expand(
6629 pmap_t pmap,
6630 vm_map_address_t v,
6631 unsigned int options,
6632 unsigned int level)
6633 {
6634 #if (__ARM_VMSA__ == 7)
6635 vm_offset_t pa;
6636 tt_entry_t *tte_p;
6637 tt_entry_t *tt_p;
6638 unsigned int i;
6639
6640
6641 while (tte_index(pmap, v) >= pmap->tte_index_max) {
6642 tte_p = pmap_tt1_allocate(pmap, 2*ARM_PGBYTES, ((options & PMAP_OPTIONS_NOWAIT)? PMAP_TT_ALLOCATE_NOWAIT : 0));
6643 if (tte_p == (tt_entry_t *)0)
6644 return KERN_RESOURCE_SHORTAGE;
6645
6646 PMAP_LOCK(pmap);
6647 if (pmap->tte_index_max > NTTES) {
6648 pmap_tt1_deallocate(pmap, tte_p, 2*ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
6649 PMAP_UNLOCK(pmap);
6650 break;
6651 }
6652
6653 pmap_simple_lock(&pmap->tt1_lock);
6654 for (i = 0; i < pmap->tte_index_max; i++)
6655 tte_p[i] = pmap->tte[i];
6656 for (i = NTTES; i < 2*NTTES; i++)
6657 tte_p[i] = ARM_TTE_TYPE_FAULT;
6658
6659 pmap->prev_tte = pmap->tte;
6660 pmap->tte = tte_p;
6661 pmap->ttep = ml_static_vtop((vm_offset_t)pmap->tte);
6662
6663 FLUSH_PTE_RANGE(pmap->tte, pmap->tte + (2*NTTES));
6664
6665 pmap->tte_index_max = 2*NTTES;
6666 pmap->stamp = hw_atomic_add(&pmap_stamp, 1);
6667
6668 for (i = 0; i < NTTES; i++)
6669 pmap->prev_tte[i] = ARM_TTE_TYPE_FAULT;
6670
6671 FLUSH_PTE_RANGE(pmap->prev_tte, pmap->prev_tte + NTTES);
6672
6673 pmap_simple_unlock(&pmap->tt1_lock);
6674 PMAP_UNLOCK(pmap);
6675 pmap_set_pmap(pmap, current_thread());
6676
6677 }
6678
6679 if (level == 1)
6680 return (KERN_SUCCESS);
6681
6682 {
6683 tt_entry_t *tte_next_p;
6684
6685 PMAP_LOCK(pmap);
6686 pa = 0;
6687 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
6688 PMAP_UNLOCK(pmap);
6689 return (KERN_SUCCESS);
6690 }
6691 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
6692 for (i = 0, tte_next_p = tte_p; i<4; i++) {
6693 if (tte_to_pa(*tte_next_p)) {
6694 pa = tte_to_pa(*tte_next_p);
6695 break;
6696 }
6697 tte_next_p++;
6698 }
6699 pa = pa & ~PAGE_MASK;
6700 if (pa) {
6701 tte_p = &pmap->tte[ttenum(v)];
6702 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
6703 FLUSH_PTE(tte_p);
6704 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
6705 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
6706 PMAP_UNLOCK(pmap);
6707 return (KERN_SUCCESS);
6708 }
6709 PMAP_UNLOCK(pmap);
6710 }
6711 v = v & ~ARM_TT_L1_PT_OFFMASK;
6712
6713
6714 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6715 /*
6716 * Allocate a VM page for the level 2 page table entries.
6717 */
6718 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6719 if(options & PMAP_OPTIONS_NOWAIT) {
6720 return KERN_RESOURCE_SHORTAGE;
6721 }
6722 VM_PAGE_WAIT();
6723 }
6724
6725 PMAP_LOCK(pmap);
6726 /*
6727 * See if someone else expanded us first
6728 */
6729 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6730 tt_entry_t *tte_next_p;
6731
6732 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
6733 pa = kvtophys((vm_offset_t)tt_p);
6734 #ifndef __ARM_L1_PTW__
6735 CleanPoU_DcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
6736 #endif
6737 tte_p = &pmap->tte[ttenum(v)];
6738 for (i = 0, tte_next_p = tte_p; i<4; i++) {
6739 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
6740 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
6741 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
6742 tte_next_p++;
6743 pa = pa +0x400;
6744 }
6745 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
6746
6747 pa = 0x0ULL;
6748 tt_p = (tt_entry_t *)NULL;
6749 }
6750 PMAP_UNLOCK(pmap);
6751 if (tt_p != (tt_entry_t *)NULL) {
6752 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
6753 tt_p = (tt_entry_t *)NULL;
6754 }
6755 }
6756 return (KERN_SUCCESS);
6757 #else
6758 pmap_paddr_t pa;
6759 #if __ARM64_TWO_LEVEL_PMAP__
6760 /* If we are using a two level page table, we'll start at L2. */
6761 unsigned int ttlevel = 2;
6762 #else
6763 /* Otherwise, we start at L1 (we use 3 levels by default). */
6764 unsigned int ttlevel = 1;
6765 #endif
6766 tt_entry_t *tte_p;
6767 tt_entry_t *tt_p;
6768
6769 pa = 0x0ULL;
6770 tt_p = (tt_entry_t *)NULL;
6771
6772 for (; ttlevel < level; ttlevel++) {
6773
6774 PMAP_LOCK(pmap);
6775
6776 if (ttlevel == 1) {
6777 if ((pmap_tt2e(pmap, v) == PT_ENTRY_NULL)) {
6778 PMAP_UNLOCK(pmap);
6779 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6780 if(options & PMAP_OPTIONS_NOWAIT) {
6781 return KERN_RESOURCE_SHORTAGE;
6782 }
6783 VM_PAGE_WAIT();
6784 }
6785 PMAP_LOCK(pmap);
6786 if ((pmap_tt2e(pmap, v) == PT_ENTRY_NULL)) {
6787 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
6788 pa = kvtophys((vm_offset_t)tt_p);
6789 tte_p = pmap_tt1e( pmap, v);
6790 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6791 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
6792 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
6793 pa = 0x0ULL;
6794 tt_p = (tt_entry_t *)NULL;
6795 if ((pmap == kernel_pmap) && (VM_MIN_KERNEL_ADDRESS < 0x00000000FFFFFFFFULL))
6796 current_pmap()->tte[v>>ARM_TT_L1_SHIFT] = kernel_pmap->tte[v>>ARM_TT_L1_SHIFT];
6797 }
6798
6799 }
6800 } else if (ttlevel == 2) {
6801 if (pmap_tt3e(pmap, v) == PT_ENTRY_NULL) {
6802 PMAP_UNLOCK(pmap);
6803 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L3_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6804 if(options & PMAP_OPTIONS_NOWAIT) {
6805 return KERN_RESOURCE_SHORTAGE;
6806 }
6807 VM_PAGE_WAIT();
6808 }
6809 PMAP_LOCK(pmap);
6810 if ((pmap_tt3e(pmap, v) == PT_ENTRY_NULL)) {
6811 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v , PMAP_TT_L3_LEVEL, FALSE);
6812 pa = kvtophys((vm_offset_t)tt_p);
6813 tte_p = pmap_tt2e( pmap, v);
6814 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6815 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L2_OFFMASK),
6816 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L2_OFFMASK) + ARM_TT_L2_SIZE), *tte_p);
6817 pa = 0x0ULL;
6818 tt_p = (tt_entry_t *)NULL;
6819 }
6820 }
6821 }
6822
6823 PMAP_UNLOCK(pmap);
6824
6825 if (tt_p != (tt_entry_t *)NULL) {
6826 pmap_tt_deallocate(pmap, tt_p, ttlevel+1);
6827 tt_p = (tt_entry_t *)NULL;
6828 }
6829 }
6830
6831 return (KERN_SUCCESS);
6832 #endif
6833 }
6834
6835 /*
6836 * Routine: pmap_collect
6837 * Function:
6838 * Garbage collects the physical map system for
6839 * pages which are no longer used.
6840 * Success need not be guaranteed -- that is, there
6841 * may well be pages which are not referenced, but
6842 * others may be collected.
6843 */
6844 void
6845 pmap_collect(pmap_t pmap)
6846 {
6847 if (pmap == PMAP_NULL)
6848 return;
6849
6850 #if 0
6851 PMAP_LOCK(pmap);
6852 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
6853 /* TODO: Scan for vm page assigned to top level page tables with no reference */
6854 }
6855 PMAP_UNLOCK(pmap);
6856 #endif
6857
6858 return;
6859 }
6860
6861 /*
6862 * Routine: pmap_gc
6863 * Function:
6864 * Pmap garbage collection
6865 * Called by the pageout daemon when pages are scarce.
6866 *
6867 */
6868 void
6869 pmap_gc(
6870 void)
6871 {
6872 pmap_t pmap, pmap_next;
6873 boolean_t gc_wait;
6874
6875 if (pmap_gc_allowed &&
6876 (pmap_gc_allowed_by_time_throttle ||
6877 pmap_gc_forced)) {
6878 pmap_gc_forced = FALSE;
6879 pmap_gc_allowed_by_time_throttle = FALSE;
6880 pmap_simple_lock(&pmaps_lock);
6881 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
6882 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
6883 if (!(pmap->gc_status & PMAP_GC_INFLIGHT))
6884 pmap->gc_status |= PMAP_GC_INFLIGHT;
6885 pmap_simple_unlock(&pmaps_lock);
6886
6887 pmap_collect(pmap);
6888
6889 pmap_simple_lock(&pmaps_lock);
6890 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
6891 pmap->gc_status &= ~(PMAP_GC_INFLIGHT|PMAP_GC_WAIT);
6892 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
6893 if (gc_wait) {
6894 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next))
6895 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
6896 pmap_simple_unlock(&pmaps_lock);
6897 thread_wakeup((event_t) & pmap->gc_status);
6898 pmap_simple_lock(&pmaps_lock);
6899 }
6900 pmap = pmap_next;
6901 }
6902 pmap_simple_unlock(&pmaps_lock);
6903 }
6904 }
6905
6906 /*
6907 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
6908 */
6909 uint64_t
6910 pmap_release_pages_fast(void)
6911 {
6912 return 0;
6913 }
6914
6915 /*
6916 * By default, don't attempt pmap GC more frequently
6917 * than once / 1 minutes.
6918 */
6919
6920 void
6921 compute_pmap_gc_throttle(
6922 void *arg __unused)
6923 {
6924 pmap_gc_allowed_by_time_throttle = TRUE;
6925 }
6926
6927 /*
6928 * pmap_attribute_cache_sync(vm_offset_t pa)
6929 *
6930 * Invalidates all of the instruction cache on a physical page and
6931 * pushes any dirty data from the data cache for the same physical page
6932 */
6933
6934 kern_return_t
6935 pmap_attribute_cache_sync(
6936 ppnum_t pp,
6937 vm_size_t size,
6938 __unused vm_machine_attribute_t attribute,
6939 __unused vm_machine_attribute_val_t * value)
6940 {
6941 if (size > PAGE_SIZE) {
6942 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
6943 } else
6944 cache_sync_page(pp);
6945
6946 return KERN_SUCCESS;
6947 }
6948
6949 /*
6950 * pmap_sync_page_data_phys(ppnum_t pp)
6951 *
6952 * Invalidates all of the instruction cache on a physical page and
6953 * pushes any dirty data from the data cache for the same physical page
6954 */
6955 void
6956 pmap_sync_page_data_phys(
6957 ppnum_t pp)
6958 {
6959 cache_sync_page(pp);
6960 }
6961
6962 /*
6963 * pmap_sync_page_attributes_phys(ppnum_t pp)
6964 *
6965 * Write back and invalidate all cachelines on a physical page.
6966 */
6967 void
6968 pmap_sync_page_attributes_phys(
6969 ppnum_t pp)
6970 {
6971 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
6972 }
6973
6974 #if CONFIG_COREDUMP
6975 /* temporary workaround */
6976 boolean_t
6977 coredumpok(
6978 vm_map_t map,
6979 vm_offset_t va)
6980 {
6981 pt_entry_t *pte_p;
6982 pt_entry_t spte;
6983
6984 pte_p = pmap_pte(map->pmap, va);
6985 if (0 == pte_p)
6986 return FALSE;
6987 spte = *pte_p;
6988 return ((spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT));
6989 }
6990 #endif
6991
6992 void
6993 fillPage(
6994 ppnum_t pn,
6995 unsigned int fill)
6996 {
6997 unsigned int *addr;
6998 int count;
6999
7000 addr = (unsigned int *) phystokv(ptoa(pn));
7001 count = PAGE_SIZE / sizeof(unsigned int);
7002 while (count--)
7003 *addr++ = fill;
7004 }
7005
7006 extern void mapping_set_mod(ppnum_t pn);
7007
7008 void
7009 mapping_set_mod(
7010 ppnum_t pn)
7011 {
7012 pmap_set_modify(pn);
7013 }
7014
7015 extern void mapping_set_ref(ppnum_t pn);
7016
7017 void
7018 mapping_set_ref(
7019 ppnum_t pn)
7020 {
7021 pmap_set_reference(pn);
7022 }
7023
7024 /*
7025 * Clear specified attribute bits.
7026 *
7027 * Try to force an arm_fast_fault() for all mappings of
7028 * the page - to force attributes to be set again at fault time.
7029 * If the forcing succeeds, clear the cached bits at the head.
7030 * Otherwise, something must have been wired, so leave the cached
7031 * attributes alone.
7032 */
7033 MARK_AS_PMAP_TEXT static void
7034 phys_attribute_clear_internal(
7035 ppnum_t pn,
7036 unsigned int bits,
7037 int options,
7038 void *arg)
7039 {
7040 pmap_paddr_t pa = ptoa(pn);
7041 vm_prot_t allow_mode = VM_PROT_ALL;
7042
7043
7044 if ((bits & PP_ATTR_MODIFIED) &&
7045 (options & PMAP_OPTIONS_NOFLUSH) &&
7046 (arg == NULL)) {
7047 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
7048 "should not clear 'modified' without flushing TLBs\n",
7049 pn, bits, options, arg);
7050 }
7051
7052 assert(pn != vm_page_fictitious_addr);
7053
7054 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
7055 assert(bits == PP_ATTR_MODIFIED);
7056
7057 pmap_page_protect_options_internal(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0);
7058 /*
7059 * We short circuit this case; it should not need to
7060 * invoke arm_force_fast_fault, so just clear the modified bit.
7061 * pmap_page_protect has taken care of resetting
7062 * the state so that we'll see the next write as a fault to
7063 * the VM (i.e. we don't want a fast fault).
7064 */
7065 pa_clear_bits(pa, bits);
7066 return;
7067 }
7068 if (bits & PP_ATTR_REFERENCED)
7069 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
7070 if (bits & PP_ATTR_MODIFIED)
7071 allow_mode &= ~VM_PROT_WRITE;
7072
7073 if (bits == PP_ATTR_NOENCRYPT) {
7074 /*
7075 * We short circuit this case; it should not need to
7076 * invoke arm_force_fast_fault, so just clear and
7077 * return. On ARM, this bit is just a debugging aid.
7078 */
7079 pa_clear_bits(pa, bits);
7080 return;
7081 }
7082
7083 if (arm_force_fast_fault_internal(pn, allow_mode, options))
7084 pa_clear_bits(pa, bits);
7085 return;
7086 }
7087
7088 static void
7089 phys_attribute_clear(
7090 ppnum_t pn,
7091 unsigned int bits,
7092 int options,
7093 void *arg)
7094 {
7095 /*
7096 * Do we really want this tracepoint? It will be extremely chatty.
7097 * Also, should we have a corresponding trace point for the set path?
7098 */
7099 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
7100
7101 phys_attribute_clear_internal(pn, bits, options, arg);
7102
7103 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
7104 }
7105
7106 /*
7107 * Set specified attribute bits.
7108 *
7109 * Set cached value in the pv head because we have
7110 * no per-mapping hardware support for referenced and
7111 * modify bits.
7112 */
7113 MARK_AS_PMAP_TEXT static void
7114 phys_attribute_set_internal(
7115 ppnum_t pn,
7116 unsigned int bits)
7117 {
7118 pmap_paddr_t pa = ptoa(pn);
7119 assert(pn != vm_page_fictitious_addr);
7120
7121
7122 pa_set_bits(pa, bits);
7123
7124 return;
7125 }
7126
7127 static void
7128 phys_attribute_set(
7129 ppnum_t pn,
7130 unsigned int bits)
7131 {
7132 phys_attribute_set_internal(pn, bits);
7133 }
7134
7135
7136 /*
7137 * Check specified attribute bits.
7138 *
7139 * use the software cached bits (since no hw support).
7140 */
7141 static boolean_t
7142 phys_attribute_test(
7143 ppnum_t pn,
7144 unsigned int bits)
7145 {
7146 pmap_paddr_t pa = ptoa(pn);
7147 assert(pn != vm_page_fictitious_addr);
7148 return pa_test_bits(pa, bits);
7149 }
7150
7151
7152 /*
7153 * Set the modify/reference bits on the specified physical page.
7154 */
7155 void
7156 pmap_set_modify(ppnum_t pn)
7157 {
7158 phys_attribute_set(pn, PP_ATTR_MODIFIED);
7159 }
7160
7161
7162 /*
7163 * Clear the modify bits on the specified physical page.
7164 */
7165 void
7166 pmap_clear_modify(
7167 ppnum_t pn)
7168 {
7169 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
7170 }
7171
7172
7173 /*
7174 * pmap_is_modified:
7175 *
7176 * Return whether or not the specified physical page is modified
7177 * by any physical maps.
7178 */
7179 boolean_t
7180 pmap_is_modified(
7181 ppnum_t pn)
7182 {
7183 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
7184 }
7185
7186
7187 /*
7188 * Set the reference bit on the specified physical page.
7189 */
7190 static void
7191 pmap_set_reference(
7192 ppnum_t pn)
7193 {
7194 phys_attribute_set(pn, PP_ATTR_REFERENCED);
7195 }
7196
7197 /*
7198 * Clear the reference bits on the specified physical page.
7199 */
7200 void
7201 pmap_clear_reference(
7202 ppnum_t pn)
7203 {
7204 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
7205 }
7206
7207
7208 /*
7209 * pmap_is_referenced:
7210 *
7211 * Return whether or not the specified physical page is referenced
7212 * by any physical maps.
7213 */
7214 boolean_t
7215 pmap_is_referenced(
7216 ppnum_t pn)
7217 {
7218 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
7219 }
7220
7221 /*
7222 * pmap_get_refmod(phys)
7223 * returns the referenced and modified bits of the specified
7224 * physical page.
7225 */
7226 unsigned int
7227 pmap_get_refmod(
7228 ppnum_t pn)
7229 {
7230 return (((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
7231 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0));
7232 }
7233
7234 /*
7235 * pmap_clear_refmod(phys, mask)
7236 * clears the referenced and modified bits as specified by the mask
7237 * of the specified physical page.
7238 */
7239 void
7240 pmap_clear_refmod_options(
7241 ppnum_t pn,
7242 unsigned int mask,
7243 unsigned int options,
7244 void *arg)
7245 {
7246 unsigned int bits;
7247
7248 bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
7249 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
7250 phys_attribute_clear(pn, bits, options, arg);
7251 }
7252
7253 void
7254 pmap_clear_refmod(
7255 ppnum_t pn,
7256 unsigned int mask)
7257 {
7258 pmap_clear_refmod_options(pn, mask, 0, NULL);
7259 }
7260
7261 unsigned int
7262 pmap_disconnect_options(
7263 ppnum_t pn,
7264 unsigned int options,
7265 void *arg)
7266 {
7267 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
7268 /*
7269 * On ARM, the "modified" bit is managed by software, so
7270 * we know up-front if the physical page is "modified",
7271 * without having to scan all the PTEs pointing to it.
7272 * The caller should have made the VM page "busy" so noone
7273 * should be able to establish any new mapping and "modify"
7274 * the page behind us.
7275 */
7276 if (pmap_is_modified(pn)) {
7277 /*
7278 * The page has been modified and will be sent to
7279 * the VM compressor.
7280 */
7281 options |= PMAP_OPTIONS_COMPRESSOR;
7282 } else {
7283 /*
7284 * The page hasn't been modified and will be freed
7285 * instead of compressed.
7286 */
7287 }
7288 }
7289
7290 /* disconnect the page */
7291 pmap_page_protect_options(pn, 0, options, arg);
7292
7293 /* return ref/chg status */
7294 return (pmap_get_refmod(pn));
7295 }
7296
7297 /*
7298 * Routine:
7299 * pmap_disconnect
7300 *
7301 * Function:
7302 * Disconnect all mappings for this page and return reference and change status
7303 * in generic format.
7304 *
7305 */
7306 unsigned int
7307 pmap_disconnect(
7308 ppnum_t pn)
7309 {
7310 pmap_page_protect(pn, 0); /* disconnect the page */
7311 return (pmap_get_refmod(pn)); /* return ref/chg status */
7312 }
7313
7314 boolean_t
7315 pmap_has_managed_page(ppnum_t first, ppnum_t last)
7316 {
7317 if (ptoa(first) >= vm_last_phys) return (FALSE);
7318 if (ptoa(last) < vm_first_phys) return (FALSE);
7319
7320 return (TRUE);
7321 }
7322
7323 /*
7324 * The state maintained by the noencrypt functions is used as a
7325 * debugging aid on ARM. This incurs some overhead on the part
7326 * of the caller. A special case check in phys_attribute_clear
7327 * (the most expensive path) currently minimizes this overhead,
7328 * but stubbing these functions out on RELEASE kernels yields
7329 * further wins.
7330 */
7331 boolean_t
7332 pmap_is_noencrypt(
7333 ppnum_t pn)
7334 {
7335 #if DEVELOPMENT || DEBUG
7336 boolean_t result = FALSE;
7337
7338 if (!pa_valid(ptoa(pn))) return FALSE;
7339
7340 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
7341
7342 return result;
7343 #else
7344 #pragma unused(pn)
7345 return FALSE;
7346 #endif
7347 }
7348
7349 void
7350 pmap_set_noencrypt(
7351 ppnum_t pn)
7352 {
7353 #if DEVELOPMENT || DEBUG
7354 if (!pa_valid(ptoa(pn))) return;
7355
7356 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
7357 #else
7358 #pragma unused(pn)
7359 #endif
7360 }
7361
7362 void
7363 pmap_clear_noencrypt(
7364 ppnum_t pn)
7365 {
7366 #if DEVELOPMENT || DEBUG
7367 if (!pa_valid(ptoa(pn))) return;
7368
7369 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
7370 #else
7371 #pragma unused(pn)
7372 #endif
7373 }
7374
7375
7376 void
7377 pmap_lock_phys_page(ppnum_t pn)
7378 {
7379 int pai;
7380 pmap_paddr_t phys = ptoa(pn);
7381
7382 if (pa_valid(phys)) {
7383 pai = (int)pa_index(phys);
7384 LOCK_PVH(pai);
7385 } else
7386 simple_lock(&phys_backup_lock);
7387 }
7388
7389
7390 void
7391 pmap_unlock_phys_page(ppnum_t pn)
7392 {
7393 int pai;
7394 pmap_paddr_t phys = ptoa(pn);
7395
7396 if (pa_valid(phys)) {
7397 pai = (int)pa_index(phys);
7398 UNLOCK_PVH(pai);
7399 } else
7400 simple_unlock(&phys_backup_lock);
7401 }
7402
7403 MARK_AS_PMAP_TEXT static void
7404 pmap_switch_user_ttb_internal(
7405 pmap_t pmap)
7406 {
7407 VALIDATE_PMAP(pmap);
7408 pmap_cpu_data_t *cpu_data_ptr;
7409 cpu_data_ptr = pmap_get_cpu_data();
7410
7411 #if (__ARM_VMSA__ == 7)
7412
7413 if ((cpu_data_ptr->cpu_user_pmap != PMAP_NULL)
7414 && (cpu_data_ptr->cpu_user_pmap != kernel_pmap)) {
7415 unsigned int c;
7416
7417 c = hw_atomic_sub((volatile uint32_t *)&cpu_data_ptr->cpu_user_pmap->cpu_ref, 1);
7418 if ((c == 0) && (cpu_data_ptr->cpu_user_pmap->prev_tte != 0)) {
7419 /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
7420 * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
7421 tt_entry_t *tt_entry;
7422
7423 tt_entry = cpu_data_ptr->cpu_user_pmap->prev_tte;
7424 cpu_data_ptr->cpu_user_pmap->prev_tte = (tt_entry_t *) NULL;
7425 pmap_tt1_deallocate(cpu_data_ptr->cpu_user_pmap, tt_entry, ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
7426 }
7427 }
7428 cpu_data_ptr->cpu_user_pmap = pmap;
7429 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
7430 (void) hw_atomic_add((volatile uint32_t *)&pmap->cpu_ref, 1);
7431
7432 #if MACH_ASSERT && __ARM_USER_PROTECT__
7433 {
7434 unsigned int ttbr0_val, ttbr1_val;
7435 __asm__ volatile("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
7436 __asm__ volatile("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
7437 if (ttbr0_val != ttbr1_val) {
7438 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
7439 }
7440 }
7441 #endif
7442 if (pmap->tte_index_max == NTTES) {
7443 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7444 __asm__ volatile("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7445 __builtin_arm_isb(ISB_SY);
7446 #if !__ARM_USER_PROTECT__
7447 set_mmu_ttb(pmap->ttep);
7448 #endif
7449 } else {
7450 #if !__ARM_USER_PROTECT__
7451 set_mmu_ttb(pmap->ttep);
7452 #endif
7453 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7454 __asm__ volatile("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7455 __builtin_arm_isb(ISB_SY);
7456 #if MACH_ASSERT && __ARM_USER_PROTECT__
7457 if (pmap->ttep & 0x1000) {
7458 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
7459 }
7460 #endif
7461 }
7462
7463 #if !__ARM_USER_PROTECT__
7464 set_context_id(pmap->asid);
7465 #endif
7466
7467 #else /* (__ARM_VMSA__ == 7) */
7468
7469 if (pmap != kernel_pmap)
7470 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
7471
7472 if (pmap == kernel_pmap) {
7473 pmap_clear_user_ttb_internal();
7474 } else {
7475 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK)|(((uint64_t)pmap->asid) << TTBR_ASID_SHIFT));
7476 }
7477 #endif
7478 }
7479
7480 void
7481 pmap_switch_user_ttb(
7482 pmap_t pmap)
7483 {
7484 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), pmap->vasid, pmap->asid);
7485 pmap_switch_user_ttb_internal(pmap);
7486 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
7487 }
7488
7489 MARK_AS_PMAP_TEXT static void
7490 pmap_clear_user_ttb_internal(void)
7491 {
7492 #if (__ARM_VMSA__ > 7)
7493 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
7494 #else
7495 set_mmu_ttb(kernel_pmap->ttep);
7496 #endif
7497 }
7498
7499 void
7500 pmap_clear_user_ttb(void)
7501 {
7502 pmap_clear_user_ttb_internal();
7503 }
7504
7505 /*
7506 * Routine: arm_force_fast_fault
7507 *
7508 * Function:
7509 * Force all mappings for this page to fault according
7510 * to the access modes allowed, so we can gather ref/modify
7511 * bits again.
7512 */
7513 MARK_AS_PMAP_TEXT static boolean_t
7514 arm_force_fast_fault_internal(
7515 ppnum_t ppnum,
7516 vm_prot_t allow_mode,
7517 int options)
7518 {
7519 pmap_paddr_t phys = ptoa(ppnum);
7520 pv_entry_t *pve_p;
7521 pt_entry_t *pte_p;
7522 int pai;
7523 boolean_t result;
7524 pv_entry_t **pv_h;
7525 boolean_t is_reusable, is_internal;
7526 boolean_t tlb_flush_needed = FALSE;
7527 boolean_t ref_fault;
7528 boolean_t mod_fault;
7529
7530 assert(ppnum != vm_page_fictitious_addr);
7531
7532 if (!pa_valid(phys)) {
7533 return FALSE; /* Not a managed page. */
7534 }
7535
7536 result = TRUE;
7537 ref_fault = FALSE;
7538 mod_fault = FALSE;
7539 pai = (int)pa_index(phys);
7540 LOCK_PVH(pai);
7541 pv_h = pai_to_pvh(pai);
7542
7543 pte_p = PT_ENTRY_NULL;
7544 pve_p = PV_ENTRY_NULL;
7545 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7546 pte_p = pvh_ptep(pv_h);
7547 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7548 pve_p = pvh_list(pv_h);
7549 }
7550
7551 is_reusable = IS_REUSABLE_PAGE(pai);
7552 is_internal = IS_INTERNAL_PAGE(pai);
7553
7554 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7555 vm_map_address_t va;
7556 pt_entry_t spte;
7557 pt_entry_t tmplate;
7558 pmap_t pmap;
7559 boolean_t update_pte;
7560
7561 if (pve_p != PV_ENTRY_NULL)
7562 pte_p = pve_get_ptep(pve_p);
7563
7564 if (pte_p == PT_ENTRY_NULL) {
7565 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7566 }
7567 #ifdef PVH_FLAG_IOMMU
7568 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7569 goto fff_skip_pve;
7570 }
7571 #endif
7572 if (*pte_p == ARM_PTE_EMPTY) {
7573 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7574 }
7575 if (ARM_PTE_IS_COMPRESSED(*pte_p)) {
7576 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7577 }
7578
7579 pmap = ptep_get_pmap(pte_p);
7580 va = ptep_get_va(pte_p);
7581
7582 assert(va >= pmap->min && va < pmap->max);
7583
7584 if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
7585 result = FALSE;
7586 break;
7587 }
7588
7589 spte = *pte_p;
7590 tmplate = spte;
7591 update_pte = FALSE;
7592
7593 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
7594 /* read protection sets the pte to fault */
7595 tmplate = tmplate & ~ARM_PTE_AF;
7596 update_pte = TRUE;
7597 ref_fault = TRUE;
7598 }
7599 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
7600 /* take away write permission if set */
7601 if (pmap == kernel_pmap) {
7602 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
7603 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7604 }
7605 } else {
7606 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
7607 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
7608 }
7609 }
7610
7611 pte_set_ffr(tmplate, 1);
7612 update_pte = TRUE;
7613 mod_fault = TRUE;
7614 }
7615
7616
7617 if (update_pte) {
7618 if (*pte_p != ARM_PTE_TYPE_FAULT &&
7619 !ARM_PTE_IS_COMPRESSED(*pte_p)) {
7620 WRITE_PTE_STRONG(pte_p, tmplate);
7621 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
7622 tlb_flush_needed = TRUE;
7623 } else {
7624 WRITE_PTE(pte_p, tmplate);
7625 __builtin_arm_isb(ISB_SY);
7626 }
7627 }
7628
7629 /* update pmap stats and ledgers */
7630 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7631 /*
7632 * We do not track "reusable" status for
7633 * "alternate accounting" mappings.
7634 */
7635 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7636 is_reusable &&
7637 is_internal &&
7638 pmap != kernel_pmap) {
7639 /* one less "reusable" */
7640 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7641 OSAddAtomic(-1, &pmap->stats.reusable);
7642 /* one more "internal" */
7643 OSAddAtomic(+1, &pmap->stats.internal);
7644 PMAP_STATS_PEAK(pmap->stats.internal);
7645 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7646 pmap_ledger_credit(pmap, task_ledgers.internal, machine_ptob(1));
7647 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7648 assert(IS_INTERNAL_PAGE(pai));
7649 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
7650
7651 /*
7652 * Avoid the cost of another trap to handle the fast
7653 * fault when we next write to this page: let's just
7654 * handle that now since we already have all the
7655 * necessary information.
7656 */
7657 {
7658 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
7659 }
7660 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7661 !is_reusable &&
7662 is_internal &&
7663 pmap != kernel_pmap) {
7664 /* one more "reusable" */
7665 OSAddAtomic(+1, &pmap->stats.reusable);
7666 PMAP_STATS_PEAK(pmap->stats.reusable);
7667 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7668 /* one less "internal" */
7669 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7670 OSAddAtomic(-1, &pmap->stats.internal);
7671 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(1));
7672 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7673 assert(IS_INTERNAL_PAGE(pai));
7674 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
7675 }
7676
7677 #ifdef PVH_FLAG_IOMMU
7678 fff_skip_pve:
7679 #endif
7680 pte_p = PT_ENTRY_NULL;
7681 if (pve_p != PV_ENTRY_NULL)
7682 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7683 }
7684
7685 if (tlb_flush_needed)
7686 sync_tlb_flush();
7687
7688 /* update global "reusable" status for this page */
7689 if (is_internal) {
7690 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7691 is_reusable) {
7692 CLR_REUSABLE_PAGE(pai);
7693 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7694 !is_reusable) {
7695 SET_REUSABLE_PAGE(pai);
7696 }
7697 }
7698
7699 if (mod_fault) {
7700 SET_MODFAULT_PAGE(pai);
7701 }
7702 if (ref_fault) {
7703 SET_REFFAULT_PAGE(pai);
7704 }
7705
7706 UNLOCK_PVH(pai);
7707 return result;
7708 }
7709
7710 boolean_t
7711 arm_force_fast_fault(
7712 ppnum_t ppnum,
7713 vm_prot_t allow_mode,
7714 int options,
7715 __unused void *arg)
7716 {
7717 pmap_paddr_t phys = ptoa(ppnum);
7718
7719 assert(ppnum != vm_page_fictitious_addr);
7720
7721 if (!pa_valid(phys)) {
7722 return FALSE; /* Not a managed page. */
7723 }
7724
7725 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
7726 }
7727
7728 /*
7729 * Routine: arm_clear_fast_fault
7730 *
7731 * Function:
7732 * Clear pending force fault for all mappings for this page based on
7733 * the observed fault type, update ref/modify bits.
7734 */
7735 boolean_t
7736 arm_clear_fast_fault(
7737 ppnum_t ppnum,
7738 vm_prot_t fault_type)
7739 {
7740 pmap_paddr_t pa = ptoa(ppnum);
7741 pv_entry_t *pve_p;
7742 pt_entry_t *pte_p;
7743 int pai;
7744 boolean_t result;
7745 boolean_t tlb_flush_needed = FALSE;
7746 pv_entry_t **pv_h;
7747
7748 assert(ppnum != vm_page_fictitious_addr);
7749
7750 if (!pa_valid(pa)) {
7751 return FALSE; /* Not a managed page. */
7752 }
7753
7754 result = FALSE;
7755 pai = (int)pa_index(pa);
7756 ASSERT_PVH_LOCKED(pai);
7757 pv_h = pai_to_pvh(pai);
7758
7759 pte_p = PT_ENTRY_NULL;
7760 pve_p = PV_ENTRY_NULL;
7761 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7762 pte_p = pvh_ptep(pv_h);
7763 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7764 pve_p = pvh_list(pv_h);
7765 }
7766
7767 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7768 vm_map_address_t va;
7769 pt_entry_t spte;
7770 pt_entry_t tmplate;
7771 pmap_t pmap;
7772
7773 if (pve_p != PV_ENTRY_NULL)
7774 pte_p = pve_get_ptep(pve_p);
7775
7776 if (pte_p == PT_ENTRY_NULL) {
7777 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7778 }
7779 #ifdef PVH_FLAG_IOMMU
7780 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7781 goto cff_skip_pve;
7782 }
7783 #endif
7784 if (*pte_p == ARM_PTE_EMPTY) {
7785 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7786 }
7787
7788 pmap = ptep_get_pmap(pte_p);
7789 va = ptep_get_va(pte_p);
7790
7791 assert(va >= pmap->min && va < pmap->max);
7792
7793 spte = *pte_p;
7794 tmplate = spte;
7795
7796 if ((fault_type & VM_PROT_WRITE) && (pte_is_ffr(spte))) {
7797 {
7798 if (pmap == kernel_pmap)
7799 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
7800 else
7801 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWRW));
7802 }
7803
7804 tmplate |= ARM_PTE_AF;
7805
7806 pte_set_ffr(tmplate, 0);
7807 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
7808
7809 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
7810 tmplate = spte | ARM_PTE_AF;
7811
7812 {
7813 pa_set_bits(pa, PP_ATTR_REFERENCED);
7814 }
7815 }
7816
7817
7818 if (spte != tmplate) {
7819 if (spte != ARM_PTE_TYPE_FAULT) {
7820 WRITE_PTE_STRONG(pte_p, tmplate);
7821 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
7822 tlb_flush_needed = TRUE;
7823 } else {
7824 WRITE_PTE(pte_p, tmplate);
7825 __builtin_arm_isb(ISB_SY);
7826 }
7827 result = TRUE;
7828 }
7829
7830 #ifdef PVH_FLAG_IOMMU
7831 cff_skip_pve:
7832 #endif
7833 pte_p = PT_ENTRY_NULL;
7834 if (pve_p != PV_ENTRY_NULL)
7835 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7836 }
7837 if (tlb_flush_needed)
7838 sync_tlb_flush();
7839 return result;
7840 }
7841
7842 /*
7843 * Determine if the fault was induced by software tracking of
7844 * modify/reference bits. If so, re-enable the mapping (and set
7845 * the appropriate bits).
7846 *
7847 * Returns KERN_SUCCESS if the fault was induced and was
7848 * successfully handled.
7849 *
7850 * Returns KERN_FAILURE if the fault was not induced and
7851 * the function was unable to deal with it.
7852 *
7853 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
7854 * disallows this type of access.
7855 */
7856 MARK_AS_PMAP_TEXT static kern_return_t
7857 arm_fast_fault_internal(
7858 pmap_t pmap,
7859 vm_map_address_t va,
7860 vm_prot_t fault_type,
7861 __unused boolean_t from_user)
7862 {
7863 kern_return_t result = KERN_FAILURE;
7864 pt_entry_t *ptep;
7865 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
7866 int pai;
7867 pmap_paddr_t pa;
7868
7869 VALIDATE_PMAP(pmap);
7870
7871 PMAP_LOCK(pmap);
7872
7873 /*
7874 * If the entry doesn't exist, is completely invalid, or is already
7875 * valid, we can't fix it here.
7876 */
7877
7878 ptep = pmap_pte(pmap, va);
7879 if (ptep != PT_ENTRY_NULL) {
7880 spte = *ptep;
7881
7882 pa = pte_to_pa(spte);
7883
7884 if ((spte == ARM_PTE_TYPE_FAULT) ||
7885 ARM_PTE_IS_COMPRESSED(spte)) {
7886 PMAP_UNLOCK(pmap);
7887 return result;
7888 }
7889
7890 if (!pa_valid(pa)) {
7891 PMAP_UNLOCK(pmap);
7892 return result;
7893 }
7894 pai = (int)pa_index(pa);
7895 LOCK_PVH(pai);
7896 } else {
7897 PMAP_UNLOCK(pmap);
7898 return result;
7899 }
7900
7901
7902 if ((IS_REFFAULT_PAGE(pai)) ||
7903 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
7904 /*
7905 * An attempted access will always clear ref/mod fault state, as
7906 * appropriate for the fault type. arm_clear_fast_fault will
7907 * update the associated PTEs for the page as appropriate; if
7908 * any PTEs are updated, we redrive the access. If the mapping
7909 * does not actually allow for the attempted access, the
7910 * following fault will (hopefully) fail to update any PTEs, and
7911 * thus cause arm_fast_fault to decide that it failed to handle
7912 * the fault.
7913 */
7914 if (IS_REFFAULT_PAGE(pai)) {
7915 CLR_REFFAULT_PAGE(pai);
7916 }
7917 if ( (fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
7918 CLR_MODFAULT_PAGE(pai);
7919 }
7920
7921 if (arm_clear_fast_fault((ppnum_t)atop(pa),fault_type)) {
7922 /*
7923 * Should this preserve KERN_PROTECTION_FAILURE? The
7924 * cost of not doing so is a another fault in a case
7925 * that should already result in an exception.
7926 */
7927 result = KERN_SUCCESS;
7928 }
7929 }
7930
7931 UNLOCK_PVH(pai);
7932 PMAP_UNLOCK(pmap);
7933 return result;
7934 }
7935
7936 kern_return_t
7937 arm_fast_fault(
7938 pmap_t pmap,
7939 vm_map_address_t va,
7940 vm_prot_t fault_type,
7941 __unused boolean_t from_user)
7942 {
7943 kern_return_t result = KERN_FAILURE;
7944
7945 if (va < pmap->min || va >= pmap->max)
7946 return result;
7947
7948 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
7949 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
7950 from_user);
7951
7952 #if (__ARM_VMSA__ == 7)
7953 if (pmap != kernel_pmap) {
7954 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
7955 pmap_t cur_pmap;
7956 pmap_t cur_user_pmap;
7957
7958 cur_pmap = current_pmap();
7959 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
7960
7961 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
7962 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
7963 pmap_set_pmap(pmap, current_thread());
7964 result = KERN_SUCCESS;
7965 goto done;
7966 }
7967 }
7968 }
7969 #endif
7970
7971 result = arm_fast_fault_internal(pmap, va, fault_type, from_user);
7972
7973 #if (__ARM_VMSA__ == 7)
7974 done:
7975 #endif
7976
7977 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
7978
7979 return result;
7980 }
7981
7982 void
7983 pmap_copy_page(
7984 ppnum_t psrc,
7985 ppnum_t pdst)
7986 {
7987 bcopy_phys((addr64_t) (ptoa(psrc)),
7988 (addr64_t) (ptoa(pdst)),
7989 PAGE_SIZE);
7990 }
7991
7992
7993 /*
7994 * pmap_copy_page copies the specified (machine independent) pages.
7995 */
7996 void
7997 pmap_copy_part_page(
7998 ppnum_t psrc,
7999 vm_offset_t src_offset,
8000 ppnum_t pdst,
8001 vm_offset_t dst_offset,
8002 vm_size_t len)
8003 {
8004 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
8005 (addr64_t) (ptoa(pdst) + dst_offset),
8006 len);
8007 }
8008
8009
8010 /*
8011 * pmap_zero_page zeros the specified (machine independent) page.
8012 */
8013 void
8014 pmap_zero_page(
8015 ppnum_t pn)
8016 {
8017 assert(pn != vm_page_fictitious_addr);
8018 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
8019 }
8020
8021 /*
8022 * pmap_zero_part_page
8023 * zeros the specified (machine independent) part of a page.
8024 */
8025 void
8026 pmap_zero_part_page(
8027 ppnum_t pn,
8028 vm_offset_t offset,
8029 vm_size_t len)
8030 {
8031 assert(pn != vm_page_fictitious_addr);
8032 assert(offset + len <= PAGE_SIZE);
8033 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
8034 }
8035
8036
8037 /*
8038 * nop in current arm implementation
8039 */
8040 void
8041 inval_copy_windows(
8042 __unused thread_t t)
8043 {
8044 }
8045
8046 void
8047 pmap_map_globals(
8048 void)
8049 {
8050 pt_entry_t *ptep, pte;
8051
8052 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
8053 assert(ptep != PT_ENTRY_NULL);
8054 assert(*ptep == ARM_PTE_EMPTY);
8055
8056 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
8057 #if __ARM_KERNEL_PROTECT__
8058 pte |= ARM_PTE_NG;
8059 #endif /* __ARM_KERNEL_PROTECT__ */
8060 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8061 #if (__ARM_VMSA__ > 7)
8062 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8063 #else
8064 pte |= ARM_PTE_SH;
8065 #endif
8066 *ptep = pte;
8067 FLUSH_PTE_RANGE(ptep,(ptep+1));
8068 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE);
8069 }
8070
8071 vm_offset_t
8072 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
8073 {
8074 if (__improbable(index >= CPUWINDOWS_MAX))
8075 panic("%s: invalid index %u", __func__, index);
8076 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
8077 }
8078
8079 MARK_AS_PMAP_TEXT static unsigned int
8080 pmap_map_cpu_windows_copy_internal(
8081 ppnum_t pn,
8082 vm_prot_t prot,
8083 unsigned int wimg_bits)
8084 {
8085 pt_entry_t *ptep = NULL, pte;
8086 unsigned int cpu_num;
8087 unsigned int i;
8088 vm_offset_t cpu_copywindow_vaddr = 0;
8089
8090 cpu_num = pmap_get_cpu_data()->cpu_number;
8091
8092 for (i = 0; i<CPUWINDOWS_MAX; i++) {
8093 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
8094 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8095 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
8096 if (*ptep == ARM_PTE_TYPE_FAULT)
8097 break;
8098 }
8099 if (i == CPUWINDOWS_MAX) {
8100 panic("pmap_map_cpu_windows_copy: out of window\n");
8101 }
8102
8103 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
8104 #if __ARM_KERNEL_PROTECT__
8105 pte |= ARM_PTE_NG;
8106 #endif /* __ARM_KERNEL_PROTECT__ */
8107
8108 pte |= wimg_to_pte(wimg_bits);
8109
8110 if (prot & VM_PROT_WRITE) {
8111 pte |= ARM_PTE_AP(AP_RWNA);
8112 } else {
8113 pte |= ARM_PTE_AP(AP_RONA);
8114 }
8115
8116 WRITE_PTE_FAST(ptep, pte);
8117 /*
8118 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8119 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8120 */
8121 FLUSH_PTE_STRONG(ptep);
8122 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE);
8123
8124 return(i);
8125 }
8126
8127 unsigned int
8128 pmap_map_cpu_windows_copy(
8129 ppnum_t pn,
8130 vm_prot_t prot,
8131 unsigned int wimg_bits)
8132 {
8133 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
8134 }
8135
8136 MARK_AS_PMAP_TEXT static void
8137 pmap_unmap_cpu_windows_copy_internal(
8138 unsigned int index)
8139 {
8140 pt_entry_t *ptep;
8141 unsigned int cpu_num;
8142 vm_offset_t cpu_copywindow_vaddr = 0;
8143
8144 cpu_num = pmap_get_cpu_data()->cpu_number;
8145
8146 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
8147 /* Issue full-system DSB to ensure prior operations on the per-CPU window
8148 * (which are likely to have been on I/O memory) are complete before
8149 * tearing down the mapping. */
8150 __builtin_arm_dsb(DSB_SY);
8151 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8152 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
8153 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE);
8154 }
8155
8156 void
8157 pmap_unmap_cpu_windows_copy(
8158 unsigned int index)
8159 {
8160 return pmap_unmap_cpu_windows_copy_internal(index);
8161 }
8162
8163 /*
8164 * Indicate that a pmap is intended to be used as a nested pmap
8165 * within one or more larger address spaces. This must be set
8166 * before pmap_nest() is called with this pmap as the 'subordinate'.
8167 */
8168 MARK_AS_PMAP_TEXT static void
8169 pmap_set_nested_internal(
8170 pmap_t pmap)
8171 {
8172 VALIDATE_PMAP(pmap);
8173 pmap->nested = TRUE;
8174 }
8175
8176 void
8177 pmap_set_nested(
8178 pmap_t pmap)
8179 {
8180 pmap_set_nested_internal(pmap);
8181 }
8182
8183 /*
8184 * pmap_trim_range(pmap, start, end)
8185 *
8186 * pmap = pmap to operate on
8187 * start = start of the range
8188 * end = end of the range
8189 *
8190 * Attempts to deallocate TTEs for the given range in the nested range.
8191 */
8192 MARK_AS_PMAP_TEXT static void
8193 pmap_trim_range(
8194 pmap_t pmap,
8195 addr64_t start,
8196 addr64_t end)
8197 {
8198 addr64_t cur;
8199 addr64_t nested_region_start;
8200 addr64_t nested_region_end;
8201 addr64_t adjusted_start;
8202 addr64_t adjusted_end;
8203 addr64_t adjust_offmask;
8204 tt_entry_t * tte_p;
8205 pt_entry_t * pte_p;
8206
8207 if (__improbable(end < start)) {
8208 panic("%s: invalid address range, "
8209 "pmap=%p, start=%p, end=%p",
8210 __func__,
8211 pmap, (void*)start, (void*)end);
8212 }
8213
8214 nested_region_start = pmap->nested ? pmap->nested_region_subord_addr : pmap->nested_region_subord_addr;
8215 nested_region_end = nested_region_start + pmap->nested_region_size;
8216
8217 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
8218 panic("%s: range outside nested region %p-%p, "
8219 "pmap=%p, start=%p, end=%p",
8220 __func__, (void *)nested_region_start, (void *)nested_region_end,
8221 pmap, (void*)start, (void*)end);
8222 }
8223
8224 /* Contract the range to TT page boundaries. */
8225 #if (__ARM_VMSA__ > 7)
8226 adjust_offmask = ARM_TT_TWIG_OFFMASK;
8227 #else /* (__ARM_VMSA__ > 7) */
8228 adjust_offmask = ((ARM_TT_TWIG_SIZE * 4) - 1);
8229 #endif /* (__ARM_VMSA__ > 7) */
8230
8231 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
8232 adjusted_end = end & ~adjust_offmask;
8233
8234 /* Iterate over the range, trying to remove TTEs. */
8235 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_TWIG_SIZE) {
8236 bool modified = false;
8237
8238 PMAP_LOCK(pmap);
8239
8240 tte_p = pmap_tte(pmap, cur);
8241
8242 if (tte_p == (tt_entry_t *) NULL) {
8243 goto done;
8244 }
8245
8246 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
8247 pte_p = (pt_entry_t *) ttetokv(*tte_p);
8248
8249 #if (__ARM_VMSA__ == 7)
8250 if ((ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
8251 (pmap != kernel_pmap)) {
8252 if (pmap->nested == TRUE) {
8253 /* Deallocate for the nested map. */
8254 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
8255 } else {
8256 /* Just remove for the parent map. */
8257 pmap_tte_remove(pmap, tte_p, PMAP_TT_L1_LEVEL);
8258 }
8259
8260 flush_mmu_tlb_entry((cur & ~ARM_TT_L1_OFFMASK) | (pmap->asid & 0xff));
8261 modified = true;
8262 }
8263 #else
8264 if ((ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
8265 (pmap != kernel_pmap)) {
8266 if (pmap->nested == TRUE) {
8267 /* Deallocate for the nested map. */
8268 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
8269 } else {
8270 /* Just remove for the parent map. */
8271 pmap_tte_remove(pmap, tte_p, PMAP_TT_L2_LEVEL);
8272 }
8273
8274 flush_mmu_tlb_entry(tlbi_addr(cur & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
8275 modified = true;
8276 }
8277 #endif
8278 }
8279
8280 done:
8281 PMAP_UNLOCK(pmap);
8282
8283 if (modified) {
8284 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE);
8285 }
8286 }
8287
8288 #if (__ARM_VMSA__ > 7)
8289 /* Remove empty L2 TTs. */
8290 adjusted_start = ((start + ARM_TT_L1_OFFMASK) & ~ARM_TT_L1_OFFMASK);
8291 adjusted_end = end & ~ARM_TT_L1_OFFMASK;
8292
8293 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_L1_SIZE) {
8294 /* For each L1 entry in our range... */
8295 PMAP_LOCK(pmap);
8296
8297 bool remove_tt1e = true;
8298 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
8299 tt_entry_t * tt2e_start;
8300 tt_entry_t * tt2e_end;
8301 tt_entry_t * tt2e_p;
8302 tt_entry_t tt1e;
8303
8304 if (tt1e_p == NULL) {
8305 PMAP_UNLOCK(pmap);
8306 continue;
8307 }
8308
8309 tt1e = *tt1e_p;
8310
8311 if (tt1e == ARM_TTE_TYPE_FAULT) {
8312 PMAP_UNLOCK(pmap);
8313 continue;
8314 }
8315
8316 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
8317 tt2e_end = &tt2e_start[TTE_PGENTRIES];
8318
8319 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
8320 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
8321 /*
8322 * If any TTEs are populated, don't remove the
8323 * L1 TT.
8324 */
8325 remove_tt1e = false;
8326 }
8327 }
8328
8329 if (remove_tt1e) {
8330 pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
8331 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE);
8332 }
8333
8334 PMAP_UNLOCK(pmap);
8335 }
8336 #endif /* (__ARM_VMSA__ > 7) */
8337 }
8338
8339 /*
8340 * pmap_trim_internal(grand, subord, vstart, nstart, size)
8341 *
8342 * grand = pmap subord is nested in
8343 * subord = nested pmap
8344 * vstart = start of the used range in grand
8345 * nstart = start of the used range in nstart
8346 * size = size of the used range
8347 *
8348 * Attempts to trim the shared region page tables down to only cover the given
8349 * range in subord and grand.
8350 */
8351 MARK_AS_PMAP_TEXT static void
8352 pmap_trim_internal(
8353 pmap_t grand,
8354 pmap_t subord,
8355 addr64_t vstart,
8356 addr64_t nstart,
8357 uint64_t size)
8358 {
8359 addr64_t vend, nend;
8360 addr64_t adjust_offmask;
8361
8362 if (__improbable(os_add_overflow(vstart, size, &vend))) {
8363 panic("%s: grand addr wraps around, "
8364 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8365 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8366 }
8367
8368 if (__improbable(os_add_overflow(nstart, size, &nend))) {
8369 panic("%s: nested addr wraps around, "
8370 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8371 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8372 }
8373
8374 VALIDATE_PMAP(grand);
8375 VALIDATE_PMAP(subord);
8376
8377 PMAP_LOCK(subord);
8378
8379 if (!subord->nested) {
8380 panic("%s: subord is not nestable, "
8381 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8382 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8383 }
8384
8385 if (grand->nested) {
8386 panic("%s: grand is nestable, "
8387 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8388 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8389 }
8390
8391 if (grand->nested_pmap != subord) {
8392 panic("%s: grand->nested != subord, "
8393 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8394 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8395 }
8396
8397 if (size != 0) {
8398 if ((vstart < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8399 panic("%s: grand range not in nested region, "
8400 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8401 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8402 }
8403
8404 if ((nstart < grand->nested_region_grand_addr) || (nend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8405 panic("%s: subord range not in nested region, "
8406 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8407 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8408 }
8409 }
8410
8411
8412 if (!grand->nested_has_no_bounds_ref) {
8413 assert(subord->nested_bounds_set);
8414
8415 if (!grand->nested_bounds_set) {
8416 /* Inherit the bounds from subord. */
8417 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8418 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8419 grand->nested_bounds_set = true;
8420 }
8421
8422 PMAP_UNLOCK(subord);
8423 return;
8424 }
8425
8426 if ((!subord->nested_bounds_set) && size) {
8427 #if (__ARM_VMSA__ > 7)
8428 adjust_offmask = ARM_TT_TWIG_OFFMASK;
8429 #else /* (__ARM_VMSA__ > 7) */
8430 adjust_offmask = ((ARM_TT_TWIG_SIZE * 4) - 1);
8431 #endif /* (__ARM_VMSA__ > 7) */
8432
8433 subord->nested_region_true_start = nstart;
8434 subord->nested_region_true_end = nend;
8435 subord->nested_region_true_start &= ~adjust_offmask;
8436
8437 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
8438 panic("%s: padded true end wraps around, "
8439 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8440 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8441 }
8442
8443 subord->nested_region_true_end &= ~adjust_offmask;
8444 subord->nested_bounds_set = true;
8445 }
8446
8447 if (subord->nested_bounds_set) {
8448 /* Inherit the bounds from subord. */
8449 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8450 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8451 grand->nested_bounds_set = true;
8452
8453 /* If we know the bounds, we can trim the pmap. */
8454 grand->nested_has_no_bounds_ref = false;
8455 PMAP_UNLOCK(subord);
8456 } else {
8457 /* Don't trim if we don't know the bounds. */
8458 PMAP_UNLOCK(subord);
8459 return;
8460 }
8461
8462 /* Trim grand to only cover the given range. */
8463 pmap_trim_range(grand, grand->nested_region_grand_addr, grand->nested_region_true_start);
8464 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_grand_addr + grand->nested_region_size));
8465
8466 /* Try to trim subord. */
8467 pmap_trim_subord(subord);
8468 }
8469
8470 MARK_AS_PMAP_TEXT static void pmap_trim_self(pmap_t pmap)
8471 {
8472 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
8473 /* If we have a no bounds ref, we need to drop it. */
8474 PMAP_LOCK(pmap->nested_pmap);
8475 pmap->nested_has_no_bounds_ref = false;
8476 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
8477 vm_map_offset_t nested_region_true_start = (pmap->nested_pmap->nested_region_true_start - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8478 vm_map_offset_t nested_region_true_end = (pmap->nested_pmap->nested_region_true_end - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8479 PMAP_UNLOCK(pmap->nested_pmap);
8480
8481 if (nested_bounds_set) {
8482 pmap_trim_range(pmap, pmap->nested_region_grand_addr, nested_region_true_start);
8483 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_grand_addr + pmap->nested_region_size));
8484 }
8485 /*
8486 * Try trimming the nested pmap, in case we had the
8487 * last reference.
8488 */
8489 pmap_trim_subord(pmap->nested_pmap);
8490 }
8491 }
8492
8493 /*
8494 * pmap_trim_subord(grand, subord)
8495 *
8496 * grand = pmap that we have nested subord in
8497 * subord = nested pmap we are attempting to trim
8498 *
8499 * Trims subord if possible
8500 */
8501 MARK_AS_PMAP_TEXT static void
8502 pmap_trim_subord(pmap_t subord)
8503 {
8504 bool contract_subord = false;
8505
8506 PMAP_LOCK(subord);
8507
8508 subord->nested_no_bounds_refcnt--;
8509
8510 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
8511 /* If this was the last no bounds reference, trim subord. */
8512 contract_subord = true;
8513 }
8514
8515 PMAP_UNLOCK(subord);
8516
8517 if (contract_subord) {
8518 pmap_trim_range(subord, subord->nested_region_subord_addr, subord->nested_region_true_start);
8519 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_subord_addr + subord->nested_region_size);
8520 }
8521 }
8522
8523 void
8524 pmap_trim(
8525 pmap_t grand,
8526 pmap_t subord,
8527 addr64_t vstart,
8528 addr64_t nstart,
8529 uint64_t size)
8530 {
8531 pmap_trim_internal(grand, subord, vstart, nstart, size);
8532 }
8533
8534 /*
8535 * kern_return_t pmap_nest(grand, subord, vstart, size)
8536 *
8537 * grand = the pmap that we will nest subord into
8538 * subord = the pmap that goes into the grand
8539 * vstart = start of range in pmap to be inserted
8540 * nstart = start of range in pmap nested pmap
8541 * size = Size of nest area (up to 16TB)
8542 *
8543 * Inserts a pmap into another. This is used to implement shared segments.
8544 *
8545 */
8546
8547 MARK_AS_PMAP_TEXT static kern_return_t
8548 pmap_nest_internal(
8549 pmap_t grand,
8550 pmap_t subord,
8551 addr64_t vstart,
8552 addr64_t nstart,
8553 uint64_t size)
8554 {
8555 kern_return_t kr = KERN_FAILURE;
8556 vm_map_offset_t vaddr, nvaddr;
8557 tt_entry_t *stte_p;
8558 tt_entry_t *gtte_p;
8559 unsigned int i;
8560 unsigned int num_tte;
8561 unsigned int nested_region_asid_bitmap_size;
8562 unsigned int* nested_region_asid_bitmap;
8563 int expand_options = 0;
8564
8565 addr64_t vend, nend;
8566 if (__improbable(os_add_overflow(vstart, size, &vend)))
8567 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
8568 if (__improbable(os_add_overflow(nstart, size, &nend)))
8569 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__, subord, nstart, size);
8570 VALIDATE_PMAP(grand);
8571 VALIDATE_PMAP(subord);
8572
8573
8574 #if (__ARM_VMSA__ == 7)
8575 if (((size|vstart|nstart) & ARM_TT_L1_PT_OFFMASK) != 0x0ULL) {
8576 return KERN_INVALID_VALUE; /* Nest 4MB region */
8577 }
8578 #else
8579 if (((size|vstart|nstart) & (ARM_TT_L2_OFFMASK)) != 0x0ULL) {
8580 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
8581 }
8582 #endif
8583
8584 if (!subord->nested)
8585 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
8586
8587 if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
8588 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
8589 }
8590
8591 if (subord->nested_region_asid_bitmap == NULL) {
8592 nested_region_asid_bitmap_size = (unsigned int)(size>>ARM_TT_TWIG_SHIFT)/(sizeof(unsigned int)*NBBY);
8593
8594 nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size*sizeof(unsigned int));
8595 bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size*sizeof(unsigned int));
8596
8597 PMAP_LOCK(subord);
8598 if (subord->nested_region_asid_bitmap == NULL) {
8599 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
8600 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
8601 subord->nested_region_subord_addr = nstart;
8602 subord->nested_region_size = (mach_vm_offset_t) size;
8603 nested_region_asid_bitmap = NULL;
8604 }
8605 PMAP_UNLOCK(subord);
8606 if (nested_region_asid_bitmap != NULL) {
8607 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size*sizeof(unsigned int));
8608 }
8609 }
8610 if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
8611 uint64_t new_size;
8612 unsigned int new_nested_region_asid_bitmap_size;
8613 unsigned int* new_nested_region_asid_bitmap;
8614
8615 nested_region_asid_bitmap = NULL;
8616 nested_region_asid_bitmap_size = 0;
8617 new_size = nend - subord->nested_region_subord_addr;
8618
8619 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
8620 new_nested_region_asid_bitmap_size = (unsigned int)((new_size>>ARM_TT_TWIG_SHIFT)/(sizeof(unsigned int)*NBBY)) + 1;
8621
8622 new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size*sizeof(unsigned int));
8623 PMAP_LOCK(subord);
8624 if (subord->nested_region_size < new_size) {
8625 bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size*sizeof(unsigned int));
8626 bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
8627 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
8628 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
8629 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
8630 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
8631 subord->nested_region_size = new_size;
8632 new_nested_region_asid_bitmap = NULL;
8633 }
8634 PMAP_UNLOCK(subord);
8635 if (nested_region_asid_bitmap != NULL)
8636 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size*sizeof(unsigned int));
8637 if (new_nested_region_asid_bitmap != NULL)
8638 kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size*sizeof(unsigned int));
8639 }
8640
8641 PMAP_LOCK(subord);
8642 if (grand->nested_pmap == PMAP_NULL) {
8643 grand->nested_pmap = subord;
8644
8645 if (!subord->nested_bounds_set) {
8646 /*
8647 * We are nesting without the shared regions bounds
8648 * being known. We'll have to trim the pmap later.
8649 */
8650 grand->nested_has_no_bounds_ref = true;
8651 subord->nested_no_bounds_refcnt++;
8652 }
8653
8654 grand->nested_region_grand_addr = vstart;
8655 grand->nested_region_subord_addr = nstart;
8656 grand->nested_region_size = (mach_vm_offset_t) size;
8657 } else {
8658 if ((grand->nested_region_grand_addr > vstart)) {
8659 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
8660 }
8661 else if ((grand->nested_region_grand_addr + grand->nested_region_size) < vend) {
8662 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
8663 }
8664 }
8665
8666 #if (__ARM_VMSA__ == 7)
8667 nvaddr = (vm_map_offset_t) nstart;
8668 vaddr = (vm_map_offset_t) vstart;
8669 num_tte = size >> ARM_TT_L1_SHIFT;
8670
8671 for (i = 0; i < num_tte; i++) {
8672 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8673 goto expand_next;
8674 }
8675
8676 stte_p = pmap_tte(subord, nvaddr);
8677 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
8678 PMAP_UNLOCK(subord);
8679 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
8680
8681 if (kr != KERN_SUCCESS) {
8682 PMAP_LOCK(grand);
8683 goto done;
8684 }
8685
8686 PMAP_LOCK(subord);
8687 }
8688 PMAP_UNLOCK(subord);
8689 PMAP_LOCK(grand);
8690 stte_p = pmap_tte(grand, vaddr);
8691 if (stte_p == (tt_entry_t *)NULL) {
8692 PMAP_UNLOCK(grand);
8693 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
8694
8695 if (kr != KERN_SUCCESS) {
8696 PMAP_LOCK(grand);
8697 goto done;
8698 }
8699 } else {
8700 PMAP_UNLOCK(grand);
8701 kr = KERN_SUCCESS;
8702 }
8703 PMAP_LOCK(subord);
8704
8705 expand_next:
8706 nvaddr += ARM_TT_L1_SIZE;
8707 vaddr += ARM_TT_L1_SIZE;
8708 }
8709
8710 #else
8711 nvaddr = (vm_map_offset_t) nstart;
8712 num_tte = (unsigned int)(size >> ARM_TT_L2_SHIFT);
8713
8714 for (i = 0; i < num_tte; i++) {
8715 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8716 goto expand_next;
8717 }
8718
8719 stte_p = pmap_tt2e(subord, nvaddr);
8720 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
8721 PMAP_UNLOCK(subord);
8722 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L3_LEVEL);
8723
8724 if (kr != KERN_SUCCESS) {
8725 PMAP_LOCK(grand);
8726 goto done;
8727 }
8728
8729 PMAP_LOCK(subord);
8730 }
8731 expand_next:
8732 nvaddr += ARM_TT_L2_SIZE;
8733 }
8734 #endif
8735 PMAP_UNLOCK(subord);
8736
8737 /*
8738 * copy tte's from subord pmap into grand pmap
8739 */
8740
8741 PMAP_LOCK(grand);
8742 nvaddr = (vm_map_offset_t) nstart;
8743 vaddr = (vm_map_offset_t) vstart;
8744
8745
8746 #if (__ARM_VMSA__ == 7)
8747 for (i = 0; i < num_tte; i++) {
8748 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8749 goto nest_next;
8750 }
8751
8752 stte_p = pmap_tte(subord, nvaddr);
8753 gtte_p = pmap_tte(grand, vaddr);
8754 *gtte_p = *stte_p;
8755
8756 nest_next:
8757 nvaddr += ARM_TT_L1_SIZE;
8758 vaddr += ARM_TT_L1_SIZE;
8759 }
8760 #else
8761 for (i = 0; i < num_tte; i++) {
8762 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8763 goto nest_next;
8764 }
8765
8766 stte_p = pmap_tt2e(subord, nvaddr);
8767 gtte_p = pmap_tt2e(grand, vaddr);
8768 if (gtte_p == PT_ENTRY_NULL) {
8769 PMAP_UNLOCK(grand);
8770 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L2_LEVEL);
8771 PMAP_LOCK(grand);
8772
8773 if (kr != KERN_SUCCESS) {
8774 goto done;
8775 }
8776
8777 gtte_p = pmap_tt2e(grand, vaddr);
8778 }
8779 *gtte_p = *stte_p;
8780
8781 nest_next:
8782 vaddr += ARM_TT_L2_SIZE;
8783 nvaddr += ARM_TT_L2_SIZE;
8784 }
8785 #endif
8786
8787 kr = KERN_SUCCESS;
8788 done:
8789
8790 stte_p = pmap_tte(grand, vstart);
8791 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
8792
8793 #if (__ARM_VMSA__ > 7)
8794 /*
8795 * check for overflow on LP64 arch
8796 */
8797 assert((size & 0xFFFFFFFF00000000ULL) == 0);
8798 #endif
8799 PMAP_UPDATE_TLBS(grand, vstart, vend);
8800
8801 PMAP_UNLOCK(grand);
8802 return kr;
8803 }
8804
8805 kern_return_t pmap_nest(
8806 pmap_t grand,
8807 pmap_t subord,
8808 addr64_t vstart,
8809 addr64_t nstart,
8810 uint64_t size)
8811 {
8812 kern_return_t kr = KERN_FAILURE;
8813
8814 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
8815 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
8816 VM_KERNEL_ADDRHIDE(vstart));
8817
8818 kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
8819
8820 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
8821
8822 return kr;
8823 }
8824
8825 /*
8826 * kern_return_t pmap_unnest(grand, vaddr)
8827 *
8828 * grand = the pmap that will have the virtual range unnested
8829 * vaddr = start of range in pmap to be unnested
8830 * size = size of range in pmap to be unnested
8831 *
8832 */
8833
8834 kern_return_t
8835 pmap_unnest(
8836 pmap_t grand,
8837 addr64_t vaddr,
8838 uint64_t size)
8839 {
8840 return(pmap_unnest_options(grand, vaddr, size, 0));
8841 }
8842
8843 MARK_AS_PMAP_TEXT static kern_return_t
8844 pmap_unnest_options_internal(
8845 pmap_t grand,
8846 addr64_t vaddr,
8847 uint64_t size,
8848 unsigned int option)
8849 {
8850 vm_map_offset_t start;
8851 vm_map_offset_t addr;
8852 tt_entry_t *tte_p;
8853 unsigned int current_index;
8854 unsigned int start_index;
8855 unsigned int max_index;
8856 unsigned int num_tte;
8857 unsigned int i;
8858
8859 addr64_t vend;
8860 if (__improbable(os_add_overflow(vaddr, size, &vend)))
8861 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
8862
8863 VALIDATE_PMAP(grand);
8864
8865 #if (__ARM_VMSA__ == 7)
8866 if (((size|vaddr) & ARM_TT_L1_PT_OFFMASK) != 0x0ULL) {
8867 panic("pmap_unnest(): unaligned request\n");
8868 }
8869 #else
8870 if (((size|vaddr) & ARM_TT_L2_OFFMASK) != 0x0ULL) {
8871 panic("pmap_unnest(): unaligned request\n");
8872 }
8873 #endif
8874
8875 if ((option & PMAP_UNNEST_CLEAN) == 0)
8876 {
8877 if (grand->nested_pmap == NULL)
8878 panic("%s: %p has no nested pmap", __func__, grand);
8879
8880 if ((vaddr < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size)))
8881 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
8882
8883 PMAP_LOCK(grand->nested_pmap);
8884
8885 start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr ;
8886 start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> ARM_TT_TWIG_SHIFT);
8887 max_index = (unsigned int)(start_index + (size >> ARM_TT_TWIG_SHIFT));
8888 num_tte = (unsigned int)(size >> ARM_TT_TWIG_SHIFT);
8889
8890 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += ARM_TT_TWIG_SIZE) {
8891 pt_entry_t *bpte, *epte, *cpte;
8892
8893 if (addr < grand->nested_pmap->nested_region_true_start) {
8894 /* We haven't reached the interesting range. */
8895 continue;
8896 }
8897
8898 if (addr >= grand->nested_pmap->nested_region_true_end) {
8899 /* We're done with the interesting range. */
8900 break;
8901 }
8902
8903 bpte = pmap_pte(grand->nested_pmap, addr);
8904 epte = bpte + (ARM_TT_LEAF_INDEX_MASK>>ARM_TT_LEAF_SHIFT);
8905
8906 if(!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
8907 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
8908
8909 for (cpte = bpte; cpte <= epte; cpte++) {
8910 pmap_paddr_t pa;
8911 int pai=0;
8912 boolean_t managed=FALSE;
8913 pt_entry_t spte;
8914
8915 if ((*cpte != ARM_PTE_TYPE_FAULT)
8916 && (!ARM_PTE_IS_COMPRESSED(*cpte))) {
8917
8918 spte = *cpte;
8919 while (!managed) {
8920 pa = pte_to_pa(spte);
8921 if (!pa_valid(pa))
8922 break;
8923 pai = (int)pa_index(pa);
8924 LOCK_PVH(pai);
8925 spte = *cpte;
8926 pa = pte_to_pa(spte);
8927 if (pai == (int)pa_index(pa)) {
8928 managed =TRUE;
8929 break; // Leave the PVH locked as we'll unlock it after we update the PTE
8930 }
8931 UNLOCK_PVH(pai);
8932 }
8933
8934 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
8935
8936 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
8937 }
8938
8939 if (managed)
8940 {
8941 ASSERT_PVH_LOCKED(pai);
8942 UNLOCK_PVH(pai);
8943 }
8944 }
8945 }
8946 }
8947
8948 FLUSH_PTE_RANGE_STRONG(bpte, epte);
8949 flush_mmu_tlb_region_asid_async(start, (unsigned)size, grand->nested_pmap);
8950 }
8951
8952 sync_tlb_flush();
8953
8954 PMAP_UNLOCK(grand->nested_pmap);
8955 }
8956
8957 PMAP_LOCK(grand);
8958
8959 /*
8960 * invalidate all pdes for segment at vaddr in pmap grand
8961 */
8962 start = vaddr;
8963 addr = vaddr;
8964
8965 num_tte = (unsigned int)(size >> ARM_TT_TWIG_SHIFT);
8966
8967 for (i = 0; i < num_tte; i++, addr += ARM_TT_TWIG_SIZE) {
8968 if (addr < grand->nested_pmap->nested_region_true_start) {
8969 /* We haven't reached the interesting range. */
8970 continue;
8971 }
8972
8973 if (addr >= grand->nested_pmap->nested_region_true_end) {
8974 /* We're done with the interesting range. */
8975 break;
8976 }
8977
8978 tte_p = pmap_tte(grand, addr);
8979 *tte_p = ARM_TTE_TYPE_FAULT;
8980 }
8981
8982 tte_p = pmap_tte(grand, start);
8983 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
8984 PMAP_UPDATE_TLBS(grand, start, vend);
8985
8986 PMAP_UNLOCK(grand);
8987
8988 return KERN_SUCCESS;
8989 }
8990
8991 kern_return_t
8992 pmap_unnest_options(
8993 pmap_t grand,
8994 addr64_t vaddr,
8995 uint64_t size,
8996 unsigned int option)
8997 {
8998 kern_return_t kr = KERN_FAILURE;
8999
9000 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
9001 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
9002
9003 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
9004
9005 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
9006
9007 return kr;
9008 }
9009
9010 boolean_t
9011 pmap_adjust_unnest_parameters(
9012 __unused pmap_t p,
9013 __unused vm_map_offset_t *s,
9014 __unused vm_map_offset_t *e)
9015 {
9016 return TRUE; /* to get to log_unnest_badness()... */
9017 }
9018
9019 /*
9020 * disable no-execute capability on
9021 * the specified pmap
9022 */
9023 #if DEVELOPMENT || DEBUG
9024 void
9025 pmap_disable_NX(
9026 pmap_t pmap)
9027 {
9028 pmap->nx_enabled = FALSE;
9029 }
9030 #else
9031 void
9032 pmap_disable_NX(
9033 __unused pmap_t pmap)
9034 {
9035 }
9036 #endif
9037
9038 void
9039 pt_fake_zone_init(
9040 int zone_index)
9041 {
9042 pt_fake_zone_index = zone_index;
9043 }
9044
9045 void
9046 pt_fake_zone_info(
9047 int *count,
9048 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
9049 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
9050 {
9051 *count = inuse_pmap_pages_count;
9052 *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
9053 *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
9054 *elem_size = PAGE_SIZE;
9055 *alloc_size = PAGE_SIZE;
9056 *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
9057
9058 *collectable = 1;
9059 *exhaustable = 0;
9060 *caller_acct = 1;
9061 }
9062
9063 /*
9064 * flush a range of hardware TLB entries.
9065 * NOTE: assumes the smallest TLB entry in use will be for
9066 * an ARM small page (4K).
9067 */
9068
9069 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9070 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
9071
9072 static void
9073 flush_mmu_tlb_region_asid_async(
9074 vm_offset_t va,
9075 unsigned length,
9076 pmap_t pmap)
9077 {
9078 #if (__ARM_VMSA__ == 7)
9079 vm_offset_t end = va + length;
9080 uint32_t asid;
9081
9082 asid = pmap->asid;
9083
9084 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
9085 boolean_t flush_all = FALSE;
9086
9087 if ((asid == 0) || (pmap->nested == TRUE))
9088 flush_all = TRUE;
9089 if (flush_all)
9090 flush_mmu_tlb_async();
9091 else
9092 flush_mmu_tlb_asid_async(asid);
9093
9094 return;
9095 }
9096 if (pmap->nested == TRUE) {
9097 #if !__ARM_MP_EXT__
9098 flush_mmu_tlb();
9099 #else
9100 va = arm_trunc_page(va);
9101 while (va < end) {
9102 flush_mmu_tlb_mva_entries_async(va);
9103 va += ARM_SMALL_PAGE_SIZE;
9104 }
9105 #endif
9106 return;
9107 }
9108 va = arm_trunc_page(va) | (asid & 0xff);
9109 flush_mmu_tlb_entries_async(va, end);
9110
9111 #else
9112 vm_offset_t end = va + length;
9113 uint32_t asid;
9114
9115 asid = pmap->asid;
9116
9117 if ((length >> ARM_TT_L3_SHIFT) > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
9118 boolean_t flush_all = FALSE;
9119
9120 if ((asid == 0) || (pmap->nested == TRUE))
9121 flush_all = TRUE;
9122 if (flush_all)
9123 flush_mmu_tlb_async();
9124 else
9125 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
9126 return;
9127 }
9128 va = tlbi_asid(asid) | tlbi_addr(va);
9129 end = tlbi_asid(asid) | tlbi_addr(end);
9130 if (pmap->nested == TRUE) {
9131 flush_mmu_tlb_allentries_async(va, end);
9132 } else {
9133 flush_mmu_tlb_entries_async(va, end);
9134 }
9135
9136 #endif
9137 }
9138
9139 void
9140 flush_mmu_tlb_region(
9141 vm_offset_t va,
9142 unsigned length)
9143 {
9144 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
9145 sync_tlb_flush();
9146 }
9147
9148 static unsigned int
9149 pmap_find_io_attr(pmap_paddr_t paddr)
9150 {
9151 pmap_io_range_t find_range = {.addr = paddr, .len = PAGE_SIZE};
9152 unsigned int begin = 0, end = num_io_rgns - 1;
9153 assert(num_io_rgns > 0);
9154
9155 for (;;) {
9156 unsigned int middle = (begin + end) / 2;
9157 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
9158 if (cmp == 0)
9159 return io_attr_table[middle].wimg;
9160 else if (begin == end)
9161 break;
9162 else if (cmp > 0)
9163 begin = middle + 1;
9164 else
9165 end = middle;
9166 };
9167
9168 return (VM_WIMG_IO);
9169 }
9170
9171 unsigned int
9172 pmap_cache_attributes(
9173 ppnum_t pn)
9174 {
9175 pmap_paddr_t paddr;
9176 int pai;
9177 unsigned int result;
9178 pp_attr_t pp_attr_current;
9179
9180 paddr = ptoa(pn);
9181
9182 if ((paddr >= io_rgn_start) && (paddr < io_rgn_end))
9183 return pmap_find_io_attr(paddr);
9184
9185 if (!pmap_initialized) {
9186 if ((paddr >= gPhysBase) && (paddr < gPhysBase+gPhysSize))
9187 return (VM_WIMG_DEFAULT);
9188 else
9189 return (VM_WIMG_IO);
9190 }
9191
9192
9193 if (!pa_valid(paddr))
9194 return (VM_WIMG_IO);
9195
9196 result = VM_WIMG_DEFAULT;
9197
9198 pai = (int)pa_index(paddr);
9199
9200 pp_attr_current = pp_attr_table[pai];
9201 if (pp_attr_current & PP_ATTR_WIMG_MASK)
9202 result = pp_attr_current & PP_ATTR_WIMG_MASK;
9203 return result;
9204 }
9205
9206 MARK_AS_PMAP_TEXT static boolean_t
9207 pmap_batch_set_cache_attributes_internal(
9208 ppnum_t pn,
9209 unsigned int cacheattr,
9210 unsigned int page_cnt,
9211 unsigned int page_index,
9212 boolean_t doit,
9213 unsigned int *res)
9214 {
9215 pmap_paddr_t paddr;
9216 int pai;
9217 pp_attr_t pp_attr_current;
9218 pp_attr_t pp_attr_template;
9219 unsigned int wimg_bits_prev, wimg_bits_new;
9220
9221 if (cacheattr & VM_WIMG_USE_DEFAULT)
9222 cacheattr = VM_WIMG_DEFAULT;
9223
9224 if ((doit == FALSE) && (*res == 0)) {
9225 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9226 *res = page_cnt;
9227 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9228 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt<<PAGE_SHIFT) == FALSE) {
9229 return FALSE;
9230 }
9231 }
9232
9233 paddr = ptoa(pn);
9234
9235 if (!pa_valid(paddr)) {
9236 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
9237 }
9238
9239 pai = (int)pa_index(paddr);
9240
9241 if (doit) {
9242 LOCK_PVH(pai);
9243 }
9244
9245 do {
9246 pp_attr_current = pp_attr_table[pai];
9247 wimg_bits_prev = VM_WIMG_DEFAULT;
9248 if (pp_attr_current & PP_ATTR_WIMG_MASK)
9249 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9250
9251 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
9252
9253 if (!doit)
9254 break;
9255
9256 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9257 * to avoid losing simultaneous updates to other bits like refmod. */
9258 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9259
9260 wimg_bits_new = VM_WIMG_DEFAULT;
9261 if (pp_attr_template & PP_ATTR_WIMG_MASK)
9262 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9263
9264 if (doit) {
9265 if (wimg_bits_new != wimg_bits_prev)
9266 pmap_update_cache_attributes_locked(pn, cacheattr);
9267 UNLOCK_PVH(pai);
9268 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT))
9269 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
9270 } else {
9271 if (wimg_bits_new == VM_WIMG_COPYBACK) {
9272 return FALSE;
9273 }
9274 if (wimg_bits_prev == wimg_bits_new) {
9275 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9276 *res = *res-1;
9277 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9278 if (!platform_cache_batch_wimg(wimg_bits_new, (*res)<<PAGE_SHIFT)) {
9279 return FALSE;
9280 }
9281 }
9282 return TRUE;
9283 }
9284
9285 if (page_cnt == (page_index+1)) {
9286 wimg_bits_prev = VM_WIMG_COPYBACK;
9287 if (((wimg_bits_prev != wimg_bits_new))
9288 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9289 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9290 && (wimg_bits_new != VM_WIMG_COPYBACK))
9291 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9292 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
9293 platform_cache_flush_wimg(wimg_bits_new);
9294 }
9295 }
9296
9297 return TRUE;
9298 };
9299
9300 boolean_t
9301 pmap_batch_set_cache_attributes(
9302 ppnum_t pn,
9303 unsigned int cacheattr,
9304 unsigned int page_cnt,
9305 unsigned int page_index,
9306 boolean_t doit,
9307 unsigned int *res)
9308 {
9309 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
9310 }
9311
9312 MARK_AS_PMAP_TEXT static void
9313 pmap_set_cache_attributes_priv(
9314 ppnum_t pn,
9315 unsigned int cacheattr,
9316 boolean_t external __unused)
9317 {
9318 pmap_paddr_t paddr;
9319 int pai;
9320 pp_attr_t pp_attr_current;
9321 pp_attr_t pp_attr_template;
9322 unsigned int wimg_bits_prev, wimg_bits_new;
9323
9324 paddr = ptoa(pn);
9325
9326 if (!pa_valid(paddr)) {
9327 return; /* Not a managed page. */
9328 }
9329
9330 if (cacheattr & VM_WIMG_USE_DEFAULT)
9331 cacheattr = VM_WIMG_DEFAULT;
9332
9333 pai = (int)pa_index(paddr);
9334
9335 LOCK_PVH(pai);
9336
9337
9338 do {
9339 pp_attr_current = pp_attr_table[pai];
9340 wimg_bits_prev = VM_WIMG_DEFAULT;
9341 if (pp_attr_current & PP_ATTR_WIMG_MASK)
9342 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9343
9344 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK)) ;
9345
9346 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9347 * to avoid losing simultaneous updates to other bits like refmod. */
9348 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9349
9350 wimg_bits_new = VM_WIMG_DEFAULT;
9351 if (pp_attr_template & PP_ATTR_WIMG_MASK)
9352 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9353
9354 if (wimg_bits_new != wimg_bits_prev)
9355 pmap_update_cache_attributes_locked(pn, cacheattr);
9356
9357 UNLOCK_PVH(pai);
9358
9359 if ((wimg_bits_prev != wimg_bits_new)
9360 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9361 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9362 && (wimg_bits_new != VM_WIMG_COPYBACK))
9363 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9364 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK)))))
9365 pmap_sync_page_attributes_phys(pn);
9366
9367 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT))
9368 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
9369 }
9370
9371 MARK_AS_PMAP_TEXT static void
9372 pmap_set_cache_attributes_internal(
9373 ppnum_t pn,
9374 unsigned int cacheattr)
9375 {
9376 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
9377 }
9378
9379 void
9380 pmap_set_cache_attributes(
9381 ppnum_t pn,
9382 unsigned int cacheattr)
9383 {
9384 pmap_set_cache_attributes_internal(pn, cacheattr);
9385 }
9386
9387 void
9388 pmap_update_cache_attributes_locked(
9389 ppnum_t ppnum,
9390 unsigned attributes)
9391 {
9392 pmap_paddr_t phys = ptoa(ppnum);
9393 pv_entry_t *pve_p;
9394 pt_entry_t *pte_p;
9395 pv_entry_t **pv_h;
9396 pt_entry_t tmplate;
9397 unsigned int pai;
9398 boolean_t tlb_flush_needed = FALSE;
9399
9400 #if __ARM_PTE_PHYSMAP__
9401 vm_offset_t kva = phystokv(phys);
9402 pte_p = pmap_pte(kernel_pmap, kva);
9403
9404 tmplate = *pte_p;
9405 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9406 tmplate |= wimg_to_pte(attributes);
9407 #if (__ARM_VMSA__ > 7)
9408 if (tmplate & ARM_PTE_HINT_MASK) {
9409 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
9410 __FUNCTION__, pte_p, (void *)kva, tmplate);
9411 }
9412 #endif
9413 WRITE_PTE_STRONG(pte_p, tmplate);
9414 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
9415 tlb_flush_needed = TRUE;
9416 #endif
9417
9418 pai = (unsigned int)pa_index(phys);
9419
9420 pv_h = pai_to_pvh(pai);
9421
9422 pte_p = PT_ENTRY_NULL;
9423 pve_p = PV_ENTRY_NULL;
9424 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
9425 pte_p = pvh_ptep(pv_h);
9426 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
9427 pve_p = pvh_list(pv_h);
9428 pte_p = PT_ENTRY_NULL;
9429 }
9430
9431 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9432 vm_map_address_t va;
9433 pmap_t pmap;
9434
9435 if (pve_p != PV_ENTRY_NULL)
9436 pte_p = pve_get_ptep(pve_p);
9437 #ifdef PVH_FLAG_IOMMU
9438 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU)
9439 goto cache_skip_pve;
9440 #endif
9441 pmap = ptep_get_pmap(pte_p);
9442 va = ptep_get_va(pte_p);
9443
9444 tmplate = *pte_p;
9445 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9446 tmplate |= wimg_to_pte(attributes);
9447
9448 WRITE_PTE_STRONG(pte_p, tmplate);
9449 flush_mmu_tlb_region_asid_async(va, PAGE_SIZE, pmap);
9450 tlb_flush_needed = TRUE;
9451
9452 #ifdef PVH_FLAG_IOMMU
9453 cache_skip_pve:
9454 #endif
9455 pte_p = PT_ENTRY_NULL;
9456 if (pve_p != PV_ENTRY_NULL)
9457 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
9458
9459 }
9460 if (tlb_flush_needed)
9461 sync_tlb_flush();
9462 }
9463
9464 #if (__ARM_VMSA__ == 7)
9465 vm_map_address_t
9466 pmap_create_sharedpage(
9467 void)
9468 {
9469 pmap_paddr_t pa;
9470 kern_return_t kr;
9471
9472 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
9473 memset((char *) phystokv(pa), 0, PAGE_SIZE);
9474
9475 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9476 assert(kr == KERN_SUCCESS);
9477
9478 return((vm_map_address_t)phystokv(pa));
9479
9480 }
9481 #else
9482 static void
9483 pmap_update_tt3e(
9484 pmap_t pmap,
9485 vm_address_t address,
9486 tt_entry_t template)
9487 {
9488 tt_entry_t *ptep, pte;
9489
9490 ptep = pmap_tt3e(pmap, address);
9491 if (ptep == NULL) {
9492 panic("%s: no ptep?\n", __FUNCTION__);
9493 }
9494
9495 pte = *ptep;
9496 pte = tte_to_pa(pte) | template;
9497 WRITE_PTE_STRONG(ptep, pte);
9498 }
9499
9500 /* Note absence of non-global bit */
9501 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
9502 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
9503 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
9504 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
9505
9506 vm_map_address_t
9507 pmap_create_sharedpage(
9508 void
9509 )
9510 {
9511 kern_return_t kr;
9512 pmap_paddr_t pa = 0;
9513
9514
9515 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
9516
9517 memset((char *) phystokv(pa), 0, PAGE_SIZE);
9518
9519 #ifdef CONFIG_XNUPOST
9520 /*
9521 * The kernel pmap maintains a user accessible mapping of the commpage
9522 * to test PAN.
9523 */
9524 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9525 assert(kr == KERN_SUCCESS);
9526
9527 /*
9528 * This mapping should not be global (as we only expect to reference it
9529 * during testing).
9530 */
9531 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
9532
9533 #if KASAN
9534 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
9535 #endif
9536 #endif /* CONFIG_XNUPOST */
9537
9538 /*
9539 * In order to avoid burning extra pages on mapping the shared page, we
9540 * create a dedicated pmap for the shared page. We forcibly nest the
9541 * translation tables from this pmap into other pmaps. The level we
9542 * will nest at depends on the MMU configuration (page size, TTBR range,
9543 * etc).
9544 *
9545 * Note that this is NOT "the nested pmap" (which is used to nest the
9546 * shared cache).
9547 *
9548 * Note that we update parameters of the entry for our unique needs (NG
9549 * entry, etc.).
9550 */
9551 sharedpage_pmap = pmap_create(NULL, 0x0, FALSE);
9552 assert(sharedpage_pmap != NULL);
9553
9554 /* The user 64-bit mapping... */
9555 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9556 assert(kr == KERN_SUCCESS);
9557 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
9558
9559 /* ...and the user 32-bit mapping. */
9560 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9561 assert(kr == KERN_SUCCESS);
9562 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
9563
9564 /* For manipulation in kernel, go straight to physical page */
9565 return ((vm_map_address_t)phystokv(pa));
9566 }
9567
9568 /*
9569 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
9570 * with user controlled TTEs.
9571 */
9572 #if (ARM_PGSHIFT == 14) || __ARM64_TWO_LEVEL_PMAP__
9573 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
9574 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
9575 #elif (ARM_PGSHIFT == 12)
9576 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
9577 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
9578 #else
9579 #error Nested shared page mapping is unsupported on this config
9580 #endif
9581
9582 MARK_AS_PMAP_TEXT static kern_return_t
9583 pmap_insert_sharedpage_internal(
9584 pmap_t pmap)
9585 {
9586 kern_return_t kr = KERN_SUCCESS;
9587 vm_offset_t sharedpage_vaddr;
9588 pt_entry_t *ttep, *src_ttep;
9589 int options = 0;
9590
9591 VALIDATE_PMAP(pmap);
9592
9593 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9594 #error We assume a single page.
9595 #endif
9596
9597 if (pmap_is_64bit(pmap)) {
9598 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
9599 } else {
9600 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
9601 }
9602
9603 PMAP_LOCK(pmap);
9604
9605 /*
9606 * For 4KB pages, we can force the commpage to nest at the level one
9607 * page table, as each entry is 1GB (i.e, there will be no overlap
9608 * with regular userspace mappings). For 16KB pages, each level one
9609 * entry is 64GB, so we must go to the second level entry (32MB) in
9610 * order to nest.
9611 */
9612 #if (ARM_PGSHIFT == 12)
9613 #if __ARM64_TWO_LEVEL_PMAP__
9614 #error A two level page table with a page shift of 12 is not currently supported
9615 #endif
9616 (void)options;
9617
9618 /* Just slam in the L1 entry. */
9619 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
9620
9621 if (*ttep != ARM_PTE_EMPTY) {
9622 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
9623 }
9624
9625 src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
9626 #elif (ARM_PGSHIFT == 14)
9627 #if !__ARM64_TWO_LEVEL_PMAP__
9628 /* Allocate for the L2 entry if necessary, and slam it into place. */
9629 /*
9630 * As long as we are use a three level page table, the first level
9631 * should always exist, so we don't need to check for it.
9632 */
9633 while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
9634 PMAP_UNLOCK(pmap);
9635
9636 kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
9637
9638 if (kr != KERN_SUCCESS) {
9639 {
9640 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
9641 }
9642 }
9643
9644 PMAP_LOCK(pmap);
9645 }
9646 #endif
9647
9648 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
9649
9650 if (*ttep != ARM_PTE_EMPTY) {
9651 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
9652 }
9653
9654 src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
9655 #endif
9656
9657 *ttep = *src_ttep;
9658 FLUSH_PTE_STRONG(ttep);
9659
9660 /* TODO: Should we flush in the 64-bit case? */
9661 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
9662
9663 #if (ARM_PGSHIFT == 12) && !__ARM64_TWO_LEVEL_PMAP__
9664 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->asid));
9665 #elif (ARM_PGSHIFT == 14)
9666 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
9667 #endif
9668 sync_tlb_flush();
9669
9670 PMAP_UNLOCK(pmap);
9671
9672 return kr;
9673 }
9674
9675 static void
9676 pmap_unmap_sharedpage(
9677 pmap_t pmap)
9678 {
9679 pt_entry_t *ttep;
9680 vm_offset_t sharedpage_vaddr;
9681
9682 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9683 #error We assume a single page.
9684 #endif
9685
9686 if (pmap_is_64bit(pmap)) {
9687 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
9688 } else {
9689 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
9690 }
9691
9692 #if (ARM_PGSHIFT == 12)
9693 #if __ARM64_TWO_LEVEL_PMAP__
9694 #error A two level page table with a page shift of 12 is not currently supported
9695 #endif
9696 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
9697
9698 if (ttep == NULL) {
9699 return;
9700 }
9701
9702 /* It had better be mapped to the shared page */
9703 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
9704 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
9705 }
9706 #elif (ARM_PGSHIFT == 14)
9707 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
9708
9709 if (ttep == NULL) {
9710 return;
9711 }
9712
9713 /* It had better be mapped to the shared page */
9714 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
9715 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
9716 }
9717 #endif
9718
9719 *ttep = ARM_TTE_EMPTY;
9720 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
9721
9722 #if (ARM_PGSHIFT == 12)
9723 #if __ARM64_TWO_LEVEL_PMAP__
9724 #error A two level page table with a page shift of 12 is not currently supported
9725 #endif
9726 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->asid));
9727 #elif (ARM_PGSHIFT == 14)
9728 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
9729 #endif
9730 sync_tlb_flush();
9731 }
9732
9733 void
9734 pmap_insert_sharedpage(
9735 pmap_t pmap)
9736 {
9737 pmap_insert_sharedpage_internal(pmap);
9738 }
9739
9740 static boolean_t
9741 pmap_is_64bit(
9742 pmap_t pmap)
9743 {
9744 return (pmap->is_64bit);
9745 }
9746
9747 #endif
9748
9749 /* ARMTODO -- an implementation that accounts for
9750 * holes in the physical map, if any.
9751 */
9752 boolean_t
9753 pmap_valid_page(
9754 ppnum_t pn) {
9755 return pa_valid(ptoa(pn));
9756 }
9757
9758 MARK_AS_PMAP_TEXT static boolean_t
9759 pmap_is_empty_internal(
9760 pmap_t pmap,
9761 vm_map_offset_t va_start,
9762 vm_map_offset_t va_end)
9763 {
9764 vm_map_offset_t block_start, block_end;
9765 tt_entry_t *tte_p;
9766
9767 if (pmap == NULL) {
9768 return TRUE;
9769 }
9770
9771 VALIDATE_PMAP(pmap);
9772
9773 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9774 PMAP_LOCK(pmap);
9775 }
9776
9777 #if (__ARM_VMSA__ == 7)
9778 if (tte_index(pmap, va_end) >= pmap->tte_index_max) {
9779 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9780 PMAP_UNLOCK(pmap);
9781 }
9782 return TRUE;
9783 }
9784
9785 block_start = va_start;
9786 tte_p = pmap_tte(pmap, block_start);
9787 while (block_start < va_end) {
9788 block_end = (block_start + ARM_TT_L1_SIZE) & ~(ARM_TT_L1_OFFMASK);
9789 if (block_end > va_end)
9790 block_end = va_end;
9791
9792 if ((*tte_p & ARM_TTE_TYPE_MASK) != 0) {
9793 vm_map_offset_t offset;
9794 ppnum_t phys_page = 0;
9795
9796 for (offset = block_start;
9797 offset < block_end;
9798 offset += ARM_PGBYTES) {
9799 // This does a pmap_find_phys() lookup but assumes lock is held
9800 phys_page = pmap_vtophys(pmap, offset);
9801 if (phys_page) {
9802 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9803 PMAP_UNLOCK(pmap);
9804 }
9805 return FALSE;
9806 }
9807 }
9808 }
9809
9810 block_start = block_end;
9811 tte_p++;
9812 }
9813 #else
9814 block_start = va_start;
9815
9816 while (block_start < va_end) {
9817 pt_entry_t *bpte_p, *epte_p;
9818 pt_entry_t *pte_p;
9819
9820 block_end = (block_start + ARM_TT_L2_SIZE) & ~ARM_TT_L2_OFFMASK;
9821 if (block_end > va_end)
9822 block_end = va_end;
9823
9824 tte_p = pmap_tt2e(pmap, block_start);
9825 if ((tte_p != PT_ENTRY_NULL)
9826 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
9827
9828 pte_p = (pt_entry_t *) ttetokv(*tte_p);
9829 bpte_p = &pte_p[tt3_index(pmap, block_start)];
9830 epte_p = bpte_p + (((block_end - block_start) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
9831
9832 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
9833 if (*pte_p != ARM_PTE_EMPTY) {
9834 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9835 PMAP_UNLOCK(pmap);
9836 }
9837 return FALSE;
9838 }
9839 }
9840 }
9841 block_start = block_end;
9842 }
9843 #endif
9844
9845 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9846 PMAP_UNLOCK(pmap);
9847 }
9848
9849 return TRUE;
9850 }
9851
9852 boolean_t
9853 pmap_is_empty(
9854 pmap_t pmap,
9855 vm_map_offset_t va_start,
9856 vm_map_offset_t va_end)
9857 {
9858 return pmap_is_empty_internal(pmap, va_start, va_end);
9859 }
9860
9861 vm_map_offset_t pmap_max_offset(
9862 boolean_t is64,
9863 unsigned int option)
9864 {
9865 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
9866 }
9867
9868 vm_map_offset_t pmap_max_64bit_offset(
9869 __unused unsigned int option)
9870 {
9871 vm_map_offset_t max_offset_ret = 0;
9872
9873 #if defined(__arm64__)
9874 const vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
9875 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
9876 max_offset_ret = arm64_pmap_max_offset_default;
9877 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
9878 max_offset_ret = min_max_offset;
9879 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
9880 max_offset_ret = MACH_VM_MAX_ADDRESS;
9881 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
9882 if (arm64_pmap_max_offset_default) {
9883 max_offset_ret = arm64_pmap_max_offset_default;
9884 } else if (max_mem > 0xC0000000) {
9885 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
9886 } else if (max_mem > 0x40000000) {
9887 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
9888 } else {
9889 max_offset_ret = min_max_offset;
9890 }
9891 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
9892 if (arm64_pmap_max_offset_default) {
9893 // Allow the boot-arg to override jumbo size
9894 max_offset_ret = arm64_pmap_max_offset_default;
9895 } else {
9896 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
9897 }
9898 } else {
9899 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
9900 }
9901
9902 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
9903 assert(max_offset_ret >= min_max_offset);
9904 #else
9905 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
9906 #endif
9907
9908 return max_offset_ret;
9909 }
9910
9911 vm_map_offset_t pmap_max_32bit_offset(
9912 unsigned int option)
9913 {
9914 vm_map_offset_t max_offset_ret = 0;
9915
9916 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
9917 max_offset_ret = arm_pmap_max_offset_default;
9918 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
9919 max_offset_ret = 0x66000000;
9920 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
9921 max_offset_ret = VM_MAX_ADDRESS;
9922 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
9923 if (arm_pmap_max_offset_default) {
9924 max_offset_ret = arm_pmap_max_offset_default;
9925 } else if (max_mem > 0x20000000) {
9926 max_offset_ret = 0x80000000;
9927 } else {
9928 max_offset_ret = 0x66000000;
9929 }
9930 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
9931 max_offset_ret = 0x80000000;
9932 } else {
9933 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
9934 }
9935
9936 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
9937 return max_offset_ret;
9938 }
9939
9940 #if CONFIG_DTRACE
9941 /*
9942 * Constrain DTrace copyin/copyout actions
9943 */
9944 extern kern_return_t dtrace_copyio_preflight(addr64_t);
9945 extern kern_return_t dtrace_copyio_postflight(addr64_t);
9946
9947 kern_return_t dtrace_copyio_preflight(
9948 __unused addr64_t va)
9949 {
9950 if (current_map() == kernel_map)
9951 return KERN_FAILURE;
9952 else
9953 return KERN_SUCCESS;
9954 }
9955
9956 kern_return_t dtrace_copyio_postflight(
9957 __unused addr64_t va)
9958 {
9959 return KERN_SUCCESS;
9960 }
9961 #endif /* CONFIG_DTRACE */
9962
9963
9964 void
9965 pmap_flush_context_init(__unused pmap_flush_context *pfc)
9966 {
9967 }
9968
9969
9970 void
9971 pmap_flush(
9972 __unused pmap_flush_context *cpus_to_flush)
9973 {
9974 /* not implemented yet */
9975 return;
9976 }
9977
9978
9979 static void __unused
9980 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
9981 {
9982 }
9983
9984 static void __unused
9985 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
9986 {
9987 }
9988
9989
9990
9991 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
9992
9993 MARK_AS_PMAP_TEXT static mach_vm_size_t
9994 pmap_query_resident_internal(
9995 pmap_t pmap,
9996 vm_map_address_t start,
9997 vm_map_address_t end,
9998 mach_vm_size_t *compressed_bytes_p)
9999 {
10000 mach_vm_size_t resident_bytes = 0;
10001 mach_vm_size_t compressed_bytes = 0;
10002
10003 pt_entry_t *bpte, *epte;
10004 pt_entry_t *pte_p;
10005 tt_entry_t *tte_p;
10006
10007 if (pmap == NULL) {
10008 return PMAP_RESIDENT_INVALID;
10009 }
10010
10011 VALIDATE_PMAP(pmap);
10012
10013 /* Ensure that this request is valid, and addresses exactly one TTE. */
10014 if (__improbable((start % ARM_PGBYTES) || (end % ARM_PGBYTES)))
10015 panic("%s: address range %p, %p not page-aligned", __func__, (void*)start, (void*)end);
10016
10017 if (__improbable((end < start) || ((end - start) > (PTE_PGENTRIES * ARM_PGBYTES))))
10018 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
10019
10020 PMAP_LOCK(pmap);
10021 tte_p = pmap_tte(pmap, start);
10022 if (tte_p == (tt_entry_t *) NULL) {
10023 PMAP_UNLOCK(pmap);
10024 return PMAP_RESIDENT_INVALID;
10025 }
10026 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
10027
10028 #if (__ARM_VMSA__ == 7)
10029 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10030 bpte = &pte_p[ptenum(start)];
10031 epte = bpte + atop(end - start);
10032 #else
10033 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10034 bpte = &pte_p[tt3_index(pmap, start)];
10035 epte = bpte + ((end - start) >> ARM_TT_L3_SHIFT);
10036 #endif
10037
10038 for (; bpte < epte; bpte++) {
10039 if (ARM_PTE_IS_COMPRESSED(*bpte)) {
10040 compressed_bytes += ARM_PGBYTES;
10041 } else if (pa_valid(pte_to_pa(*bpte))) {
10042 resident_bytes += ARM_PGBYTES;
10043 }
10044 }
10045 }
10046 PMAP_UNLOCK(pmap);
10047
10048 if (compressed_bytes_p) {
10049 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10050 *compressed_bytes_p += compressed_bytes;
10051 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10052 }
10053
10054 return resident_bytes;
10055 }
10056
10057 mach_vm_size_t
10058 pmap_query_resident(
10059 pmap_t pmap,
10060 vm_map_address_t start,
10061 vm_map_address_t end,
10062 mach_vm_size_t *compressed_bytes_p)
10063 {
10064 mach_vm_size_t total_resident_bytes;
10065 mach_vm_size_t compressed_bytes;
10066 vm_map_address_t va;
10067
10068
10069 if (pmap == PMAP_NULL) {
10070 if (compressed_bytes_p) {
10071 *compressed_bytes_p = 0;
10072 }
10073 return 0;
10074 }
10075
10076 total_resident_bytes = 0;
10077 compressed_bytes = 0;
10078
10079 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
10080 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
10081 VM_KERNEL_ADDRHIDE(end));
10082
10083 va = start;
10084 while (va < end) {
10085 vm_map_address_t l;
10086 mach_vm_size_t resident_bytes;
10087
10088 l = ((va + ARM_TT_TWIG_SIZE) & ~ARM_TT_TWIG_OFFMASK);
10089
10090 if (l > end)
10091 l = end;
10092 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
10093 if (resident_bytes == PMAP_RESIDENT_INVALID)
10094 break;
10095
10096 total_resident_bytes += resident_bytes;
10097
10098 va = l;
10099 }
10100
10101 if (compressed_bytes_p) {
10102 *compressed_bytes_p = compressed_bytes;
10103 }
10104
10105 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
10106 total_resident_bytes);
10107
10108 return total_resident_bytes;
10109 }
10110
10111 #if MACH_ASSERT
10112 static void
10113 pmap_check_ledgers(
10114 pmap_t pmap)
10115 {
10116 ledger_amount_t bal;
10117 int pid;
10118 char *procname;
10119 boolean_t do_panic;
10120
10121 if (pmap->pmap_pid == 0) {
10122 /*
10123 * This pmap was not or is no longer fully associated
10124 * with a task (e.g. the old pmap after a fork()/exec() or
10125 * spawn()). Its "ledger" still points at a task that is
10126 * now using a different (and active) address space, so
10127 * we can't check that all the pmap ledgers are balanced here.
10128 *
10129 * If the "pid" is set, that means that we went through
10130 * pmap_set_process() in task_terminate_internal(), so
10131 * this task's ledger should not have been re-used and
10132 * all the pmap ledgers should be back to 0.
10133 */
10134 return;
10135 }
10136
10137 do_panic = FALSE;
10138 pid = pmap->pmap_pid;
10139 procname = pmap->pmap_procname;
10140
10141 pmap_ledgers_drift.num_pmaps_checked++;
10142
10143 #define LEDGER_CHECK_BALANCE(__LEDGER) \
10144 MACRO_BEGIN \
10145 int panic_on_negative = TRUE; \
10146 ledger_get_balance(pmap->ledger, \
10147 task_ledgers.__LEDGER, \
10148 &bal); \
10149 ledger_get_panic_on_negative(pmap->ledger, \
10150 task_ledgers.__LEDGER, \
10151 &panic_on_negative); \
10152 if (bal != 0) { \
10153 if (panic_on_negative || \
10154 (pmap_ledgers_panic && \
10155 pmap_ledgers_panic_leeway > 0 && \
10156 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
10157 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
10158 do_panic = TRUE; \
10159 } \
10160 printf("LEDGER BALANCE proc %d (%s) " \
10161 "\"%s\" = %lld\n", \
10162 pid, procname, #__LEDGER, bal); \
10163 if (bal > 0) { \
10164 pmap_ledgers_drift.__LEDGER##_over++; \
10165 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
10166 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
10167 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
10168 } \
10169 } else if (bal < 0) { \
10170 pmap_ledgers_drift.__LEDGER##_under++; \
10171 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
10172 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
10173 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
10174 } \
10175 } \
10176 } \
10177 MACRO_END
10178
10179 LEDGER_CHECK_BALANCE(phys_footprint);
10180 LEDGER_CHECK_BALANCE(internal);
10181 LEDGER_CHECK_BALANCE(internal_compressed);
10182 LEDGER_CHECK_BALANCE(iokit_mapped);
10183 LEDGER_CHECK_BALANCE(alternate_accounting);
10184 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
10185 LEDGER_CHECK_BALANCE(page_table);
10186 LEDGER_CHECK_BALANCE(purgeable_volatile);
10187 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
10188 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
10189 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
10190 LEDGER_CHECK_BALANCE(network_volatile);
10191 LEDGER_CHECK_BALANCE(network_nonvolatile);
10192 LEDGER_CHECK_BALANCE(network_volatile_compressed);
10193 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
10194
10195 if (do_panic) {
10196 if (pmap_ledgers_panic) {
10197 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
10198 pmap, pid, procname);
10199 } else {
10200 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
10201 pmap, pid, procname);
10202 }
10203 }
10204
10205 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
10206 #if 00
10207 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
10208 #endif
10209 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
10210 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
10211 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
10212 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
10213 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
10214 }
10215 #endif /* MACH_ASSERT */
10216
10217 void pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a) {
10218 }
10219
10220
10221 #if CONFIG_PGTRACE
10222 #define PROF_START uint64_t t, nanot;\
10223 t = mach_absolute_time();
10224
10225 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
10226 kprintf("%s: took %llu ns\n", __func__, nanot);
10227
10228 #define PMAP_PGTRACE_LOCK(p) \
10229 do { \
10230 *(p) = ml_set_interrupts_enabled(false); \
10231 if (simple_lock_try(&(pmap_pgtrace.lock))) break; \
10232 ml_set_interrupts_enabled(*(p)); \
10233 } while (true)
10234
10235 #define PMAP_PGTRACE_UNLOCK(p) \
10236 do { \
10237 simple_unlock(&(pmap_pgtrace.lock)); \
10238 ml_set_interrupts_enabled(*(p)); \
10239 } while (0)
10240
10241 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
10242 do { \
10243 *(pte_p) = (pte_entry); \
10244 FLUSH_PTE(pte_p); \
10245 } while (0)
10246
10247 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
10248
10249 typedef enum {
10250 UNDEFINED,
10251 PA_UNDEFINED,
10252 VA_UNDEFINED,
10253 DEFINED
10254 } pmap_pgtrace_page_state_t;
10255
10256 typedef struct {
10257 queue_chain_t chain;
10258
10259 /*
10260 pa - pa
10261 maps - list of va maps to upper pa
10262 map_pool - map pool
10263 map_waste - waste can
10264 state - state
10265 */
10266 pmap_paddr_t pa;
10267 queue_head_t maps;
10268 queue_head_t map_pool;
10269 queue_head_t map_waste;
10270 pmap_pgtrace_page_state_t state;
10271 } pmap_pgtrace_page_t;
10272
10273 static struct {
10274 /*
10275 pages - list of tracing page info
10276 */
10277 queue_head_t pages;
10278 decl_simple_lock_data(, lock);
10279 } pmap_pgtrace = {};
10280
10281 static void pmap_pgtrace_init(void)
10282 {
10283 queue_init(&(pmap_pgtrace.pages));
10284 simple_lock_init(&(pmap_pgtrace.lock), 0);
10285
10286 boolean_t enabled;
10287
10288 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
10289 pgtrace_enabled = enabled;
10290 }
10291 }
10292
10293 // find a page with given pa - pmap_pgtrace should be locked
10294 inline static pmap_pgtrace_page_t *pmap_pgtrace_find_page(pmap_paddr_t pa)
10295 {
10296 queue_head_t *q = &(pmap_pgtrace.pages);
10297 pmap_pgtrace_page_t *p;
10298
10299 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10300 if (p->state == UNDEFINED) {
10301 continue;
10302 }
10303 if (p->state == PA_UNDEFINED) {
10304 continue;
10305 }
10306 if (p->pa == pa) {
10307 return p;
10308 }
10309 }
10310
10311 return NULL;
10312 }
10313
10314 // enter clone of given pmap, va page and range - pmap should be locked
10315 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
10316 {
10317 bool ints;
10318 queue_head_t *q = &(pmap_pgtrace.pages);
10319 pmap_paddr_t pa_page;
10320 pt_entry_t *ptep, *cptep;
10321 pmap_pgtrace_page_t *p;
10322 bool found = false;
10323
10324 PMAP_ASSERT_LOCKED(pmap);
10325 assert(va_page == arm_trunc_page(va_page));
10326
10327 PMAP_PGTRACE_LOCK(&ints);
10328
10329 ptep = pmap_pte(pmap, va_page);
10330
10331 // target pte should exist
10332 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
10333 PMAP_PGTRACE_UNLOCK(&ints);
10334 return false;
10335 }
10336
10337 queue_head_t *mapq;
10338 queue_head_t *mappool;
10339 pmap_pgtrace_map_t *map = NULL;
10340
10341 pa_page = pte_to_pa(*ptep);
10342
10343 // find if we have a page info defined for this
10344 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10345 mapq = &(p->maps);
10346 mappool = &(p->map_pool);
10347
10348 switch (p->state) {
10349 case PA_UNDEFINED:
10350 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10351 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10352 p->pa = pa_page;
10353 map->range.start = start;
10354 map->range.end = end;
10355 found = true;
10356 break;
10357 }
10358 }
10359 break;
10360
10361 case VA_UNDEFINED:
10362 if (p->pa != pa_page) {
10363 break;
10364 }
10365 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10366 if (map->cloned == false) {
10367 map->pmap = pmap;
10368 map->ova = va_page;
10369 map->range.start = start;
10370 map->range.end = end;
10371 found = true;
10372 break;
10373 }
10374 }
10375 break;
10376
10377 case DEFINED:
10378 if (p->pa != pa_page) {
10379 break;
10380 }
10381 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10382 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
10383 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
10384 break;
10385 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
10386 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
10387 break;
10388 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10389 // range should be already defined as well
10390 found = true;
10391 break;
10392 }
10393 }
10394 break;
10395
10396 default:
10397 panic("invalid state p->state=%x\n", p->state);
10398 }
10399
10400 if (found == true) {
10401 break;
10402 }
10403 }
10404
10405 // do not clone if no page info found
10406 if (found == false) {
10407 PMAP_PGTRACE_UNLOCK(&ints);
10408 return false;
10409 }
10410
10411 // copy pre, target and post ptes to clone ptes
10412 for (int i = 0; i < 3; i++) {
10413 ptep = pmap_pte(pmap, va_page + (i-1)*ARM_PGBYTES);
10414 cptep = pmap_pte(kernel_pmap, map->cva[i]);
10415 assert(cptep != NULL);
10416 if (ptep == NULL) {
10417 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
10418 } else {
10419 PGTRACE_WRITE_PTE(cptep, *ptep);
10420 }
10421 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i]+ARM_PGBYTES);
10422 }
10423
10424 // get ptes for original and clone
10425 ptep = pmap_pte(pmap, va_page);
10426 cptep = pmap_pte(kernel_pmap, map->cva[1]);
10427
10428 // invalidate original pte and mark it as a pgtrace page
10429 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
10430 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova+ARM_PGBYTES);
10431
10432 map->cloned = true;
10433 p->state = DEFINED;
10434
10435 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
10436
10437 PMAP_PGTRACE_UNLOCK(&ints);
10438
10439 return true;
10440 }
10441
10442 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
10443 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
10444 {
10445 bool ints, found = false;
10446 pmap_pgtrace_page_t *p;
10447 pt_entry_t *ptep;
10448
10449 PMAP_PGTRACE_LOCK(&ints);
10450
10451 // we must have this page info
10452 p = pmap_pgtrace_find_page(pa);
10453 if (p == NULL) {
10454 goto unlock_exit;
10455 }
10456
10457 // find matching map
10458 queue_head_t *mapq = &(p->maps);
10459 queue_head_t *mappool = &(p->map_pool);
10460 pmap_pgtrace_map_t *map;
10461
10462 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10463 if (map->pmap == pmap && map->ova == va) {
10464 found = true;
10465 break;
10466 }
10467 }
10468
10469 if (!found) {
10470 goto unlock_exit;
10471 }
10472
10473 if (map->cloned == true) {
10474 // Restore back the pte to original state
10475 ptep = pmap_pte(pmap, map->ova);
10476 assert(ptep);
10477 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10478 PMAP_UPDATE_TLBS(pmap, va, va+ARM_PGBYTES);
10479
10480 // revert clone pages
10481 for (int i = 0; i < 3; i++) {
10482 ptep = pmap_pte(kernel_pmap, map->cva[i]);
10483 assert(ptep != NULL);
10484 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
10485 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i]+ARM_PGBYTES);
10486 }
10487 }
10488
10489 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
10490 map->pmap = NULL;
10491 map->ova = (vm_map_offset_t)NULL;
10492 map->cloned = false;
10493 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
10494
10495 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
10496
10497 unlock_exit:
10498 PMAP_PGTRACE_UNLOCK(&ints);
10499 }
10500
10501 // remove all clones of given pa - pmap must be locked
10502 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
10503 {
10504 bool ints;
10505 pmap_pgtrace_page_t *p;
10506 pt_entry_t *ptep;
10507
10508 PMAP_PGTRACE_LOCK(&ints);
10509
10510 // we must have this page info
10511 p = pmap_pgtrace_find_page(pa);
10512 if (p == NULL) {
10513 PMAP_PGTRACE_UNLOCK(&ints);
10514 return;
10515 }
10516
10517 queue_head_t *mapq = &(p->maps);
10518 queue_head_t *mappool = &(p->map_pool);
10519 queue_head_t *mapwaste = &(p->map_waste);
10520 pmap_pgtrace_map_t *map;
10521
10522 // move maps to waste
10523 while (!queue_empty(mapq)) {
10524 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
10525 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
10526 }
10527
10528 PMAP_PGTRACE_UNLOCK(&ints);
10529
10530 // sanitize maps in waste
10531 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
10532 if (map->cloned == true) {
10533 PMAP_LOCK(map->pmap);
10534
10535 // restore back original pte
10536 ptep = pmap_pte(map->pmap, map->ova);
10537 assert(ptep);
10538 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10539 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova+ARM_PGBYTES);
10540
10541 // revert clone ptes
10542 for (int i = 0; i < 3; i++) {
10543 ptep = pmap_pte(kernel_pmap, map->cva[i]);
10544 assert(ptep != NULL);
10545 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
10546 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i]+ARM_PGBYTES);
10547 }
10548
10549 PMAP_UNLOCK(map->pmap);
10550 }
10551
10552 map->pmap = NULL;
10553 map->ova = (vm_map_offset_t)NULL;
10554 map->cloned = false;
10555 }
10556
10557 PMAP_PGTRACE_LOCK(&ints);
10558
10559 // recycle maps back to map_pool
10560 while (!queue_empty(mapwaste)) {
10561 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
10562 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
10563 }
10564
10565 PMAP_PGTRACE_UNLOCK(&ints);
10566 }
10567
10568 inline static void pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
10569 {
10570 uint64_t tsz;
10571 vm_map_offset_t end;
10572
10573 if (pmap == kernel_pmap) {
10574 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
10575 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64-tsz)) << (64-tsz));
10576 *endp = VM_MAX_KERNEL_ADDRESS;
10577 } else {
10578 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
10579 if (tsz == 64) {
10580 end = 0;
10581 } else {
10582 end = ((uint64_t)1 << (64-tsz)) - 1;
10583 }
10584
10585 *startp = 0;
10586 *endp = end;
10587 }
10588
10589 assert(*endp > *startp);
10590
10591 return;
10592 }
10593
10594 // has pa mapped in given pmap? then clone it
10595 static uint64_t pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset) {
10596 uint64_t ret = 0;
10597 vm_map_offset_t min, max;
10598 vm_map_offset_t cur_page, end_page;
10599 pt_entry_t *ptep;
10600 tt_entry_t *ttep;
10601 tt_entry_t tte;
10602
10603 pmap_pgtrace_get_search_space(pmap, &min, &max);
10604
10605 cur_page = arm_trunc_page(min);
10606 end_page = arm_trunc_page(max);
10607 while (cur_page <= end_page) {
10608 vm_map_offset_t add = 0;
10609
10610 PMAP_LOCK(pmap);
10611
10612 // skip uninterested space
10613 if (pmap == kernel_pmap &&
10614 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
10615 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
10616 add = ARM_PGBYTES;
10617 goto unlock_continue;
10618 }
10619
10620 #if __ARM64_TWO_LEVEL_PMAP__
10621 // check whether we can skip l2
10622 ttep = pmap_tt2e(pmap, cur_page);
10623 assert(ttep);
10624 tte = *ttep;
10625 #else
10626 // check whether we can skip l1
10627 ttep = pmap_tt1e(pmap, cur_page);
10628 assert(ttep);
10629 tte = *ttep;
10630 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10631 add = ARM_TT_L1_SIZE;
10632 goto unlock_continue;
10633 }
10634
10635 // how about l2
10636 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, cur_page)];
10637 #endif
10638 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10639 add = ARM_TT_L2_SIZE;
10640 goto unlock_continue;
10641 }
10642
10643 // ptep finally
10644 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, cur_page)]);
10645 if (ptep == PT_ENTRY_NULL) {
10646 add = ARM_TT_L3_SIZE;
10647 goto unlock_continue;
10648 }
10649
10650 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
10651 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
10652 ret++;
10653 }
10654 }
10655
10656 add = ARM_PGBYTES;
10657
10658 unlock_continue:
10659 PMAP_UNLOCK(pmap);
10660
10661 //overflow
10662 if (cur_page + add < cur_page) {
10663 break;
10664 }
10665
10666 cur_page += add;
10667 }
10668
10669
10670 return ret;
10671 }
10672
10673 // search pv table and clone vas of given pa
10674 static uint64_t pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
10675 {
10676 uint64_t ret = 0;
10677 unsigned long pai;
10678 pv_entry_t **pvh;
10679 pt_entry_t *ptep;
10680 pmap_t pmap;
10681
10682 typedef struct {
10683 queue_chain_t chain;
10684 pmap_t pmap;
10685 vm_map_offset_t va;
10686 } pmap_va_t;
10687
10688 queue_head_t pmapvaq;
10689 pmap_va_t *pmapva;
10690
10691 queue_init(&pmapvaq);
10692
10693 pai = pa_index(pa);
10694 LOCK_PVH(pai);
10695 pvh = pai_to_pvh(pai);
10696
10697 // collect pmap/va pair from pvh
10698 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
10699 ptep = pvh_ptep(pvh);
10700 pmap = ptep_get_pmap(ptep);
10701
10702 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
10703 pmapva->pmap = pmap;
10704 pmapva->va = ptep_get_va(ptep);
10705
10706 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
10707
10708 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
10709 pv_entry_t *pvep;
10710
10711 pvep = pvh_list(pvh);
10712 while (pvep) {
10713 ptep = pve_get_ptep(pvep);
10714 pmap = ptep_get_pmap(ptep);
10715
10716 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
10717 pmapva->pmap = pmap;
10718 pmapva->va = ptep_get_va(ptep);
10719
10720 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
10721
10722 pvep = PVE_NEXT_PTR(pve_next(pvep));
10723 }
10724 }
10725
10726 UNLOCK_PVH(pai);
10727
10728 // clone them while making sure mapping still exists
10729 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
10730 PMAP_LOCK(pmapva->pmap);
10731 ptep = pmap_pte(pmapva->pmap, pmapva->va);
10732 if (pte_to_pa(*ptep) == pa) {
10733 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
10734 ret++;
10735 }
10736 }
10737 PMAP_UNLOCK(pmapva->pmap);
10738
10739 kfree(pmapva, sizeof(pmap_va_t));
10740 }
10741
10742 return ret;
10743 }
10744
10745 // allocate a page info
10746 static pmap_pgtrace_page_t *pmap_pgtrace_alloc_page(void)
10747 {
10748 pmap_pgtrace_page_t *p;
10749 queue_head_t *mapq;
10750 queue_head_t *mappool;
10751 queue_head_t *mapwaste;
10752 pmap_pgtrace_map_t *map;
10753
10754 p = kalloc(sizeof(pmap_pgtrace_page_t));
10755 assert(p);
10756
10757 p->state = UNDEFINED;
10758
10759 mapq = &(p->maps);
10760 mappool = &(p->map_pool);
10761 mapwaste = &(p->map_waste);
10762 queue_init(mapq);
10763 queue_init(mappool);
10764 queue_init(mapwaste);
10765
10766 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
10767 vm_map_offset_t newcva;
10768 pt_entry_t *cptep;
10769 kern_return_t kr;
10770 vm_map_entry_t entry;
10771
10772 // get a clone va
10773 vm_object_reference(kernel_object);
10774 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3*ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
10775 if (kr != KERN_SUCCESS) {
10776 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
10777 }
10778 VME_OBJECT_SET(entry, kernel_object);
10779 VME_OFFSET_SET(entry, newcva);
10780 vm_map_unlock(kernel_map);
10781
10782 // fill default clone page info and add to pool
10783 map = kalloc(sizeof(pmap_pgtrace_map_t));
10784 for (int j = 0; j < 3; j ++) {
10785 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
10786
10787 // pre-expand pmap while preemption enabled
10788 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
10789 if (kr != KERN_SUCCESS) {
10790 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
10791 }
10792
10793 cptep = pmap_pte(kernel_pmap, addr);
10794 assert(cptep != NULL);
10795
10796 map->cva[j] = addr;
10797 map->cva_spte[j] = *cptep;
10798 }
10799 map->range.start = map->range.end = 0;
10800 map->cloned = false;
10801 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
10802 }
10803
10804 return p;
10805 }
10806
10807 // free a page info
10808 static void pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
10809 {
10810 queue_head_t *mapq;
10811 queue_head_t *mappool;
10812 queue_head_t *mapwaste;
10813 pmap_pgtrace_map_t *map;
10814
10815 assert(p);
10816
10817 mapq = &(p->maps);
10818 mappool = &(p->map_pool);
10819 mapwaste = &(p->map_waste);
10820
10821 while (!queue_empty(mapq)) {
10822 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
10823 kfree(map, sizeof(pmap_pgtrace_map_t));
10824 }
10825
10826 while (!queue_empty(mappool)) {
10827 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
10828 kfree(map, sizeof(pmap_pgtrace_map_t));
10829 }
10830
10831 while (!queue_empty(mapwaste)) {
10832 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
10833 kfree(map, sizeof(pmap_pgtrace_map_t));
10834 }
10835
10836 kfree(p, sizeof(pmap_pgtrace_page_t));
10837 }
10838
10839 // construct page infos with the given address range
10840 int pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
10841 {
10842 int ret = 0;
10843 pt_entry_t *ptep;
10844 queue_head_t *q = &(pmap_pgtrace.pages);
10845 bool ints;
10846 vm_map_offset_t cur_page, end_page;
10847
10848 if (start > end) {
10849 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
10850 return -1;
10851 }
10852
10853 PROF_START
10854
10855 // add each page in given range
10856 cur_page = arm_trunc_page(start);
10857 end_page = arm_trunc_page(end);
10858 while (cur_page <= end_page) {
10859 pmap_paddr_t pa_page = 0;
10860 uint64_t num_cloned = 0;
10861 pmap_pgtrace_page_t *p = NULL, *newp;
10862 bool free_newp = true;
10863 pmap_pgtrace_page_state_t state;
10864
10865 // do all allocations outside of spinlocks
10866 newp = pmap_pgtrace_alloc_page();
10867
10868 // keep lock orders in pmap, kernel_pmap and pgtrace lock
10869 if (pmap != NULL) {
10870 PMAP_LOCK(pmap);
10871 }
10872 if (pmap != kernel_pmap) {
10873 PMAP_LOCK(kernel_pmap);
10874 }
10875
10876 // addresses are physical if pmap is null
10877 if (pmap == NULL) {
10878 ptep = NULL;
10879 pa_page = cur_page;
10880 state = VA_UNDEFINED;
10881 } else {
10882 ptep = pmap_pte(pmap, cur_page);
10883 if (ptep != NULL) {
10884 pa_page = pte_to_pa(*ptep);
10885 state = DEFINED;
10886 } else {
10887 state = PA_UNDEFINED;
10888 }
10889 }
10890
10891 // search if we have a page info already
10892 PMAP_PGTRACE_LOCK(&ints);
10893 if (state != PA_UNDEFINED) {
10894 p = pmap_pgtrace_find_page(pa_page);
10895 }
10896
10897 // add pre-allocated page info if nothing found
10898 if (p == NULL) {
10899 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
10900 p = newp;
10901 free_newp = false;
10902 }
10903
10904 // now p points what we want
10905 p->state = state;
10906
10907 queue_head_t *mapq = &(p->maps);
10908 queue_head_t *mappool = &(p->map_pool);
10909 pmap_pgtrace_map_t *map;
10910 vm_map_offset_t start_offset, end_offset;
10911
10912 // calculate trace offsets in the page
10913 if (cur_page > start) {
10914 start_offset = 0;
10915 } else {
10916 start_offset = start-cur_page;
10917 }
10918 if (cur_page == end_page) {
10919 end_offset = end-end_page;
10920 } else {
10921 end_offset = ARM_PGBYTES-1;
10922 }
10923
10924 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
10925
10926 // fill map info
10927 assert(!queue_empty(mappool));
10928 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
10929 if (p->state == PA_UNDEFINED) {
10930 map->pmap = pmap;
10931 map->ova = cur_page;
10932 map->range.start = start_offset;
10933 map->range.end = end_offset;
10934 } else if (p->state == VA_UNDEFINED) {
10935 p->pa = pa_page;
10936 map->range.start = start_offset;
10937 map->range.end = end_offset;
10938 } else if (p->state == DEFINED) {
10939 p->pa = pa_page;
10940 map->pmap = pmap;
10941 map->ova = cur_page;
10942 map->range.start = start_offset;
10943 map->range.end = end_offset;
10944 } else {
10945 panic("invalid p->state=%d\n", p->state);
10946 }
10947
10948 // not cloned yet
10949 map->cloned = false;
10950 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
10951
10952 // unlock locks
10953 PMAP_PGTRACE_UNLOCK(&ints);
10954 if (pmap != kernel_pmap) {
10955 PMAP_UNLOCK(kernel_pmap);
10956 }
10957 if (pmap != NULL) {
10958 PMAP_UNLOCK(pmap);
10959 }
10960
10961 // now clone it
10962 if (pa_valid(pa_page)) {
10963 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
10964 }
10965 if (pmap == NULL) {
10966 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
10967 } else {
10968 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
10969 }
10970
10971 // free pre-allocations if we didn't add it to the q
10972 if (free_newp) {
10973 pmap_pgtrace_free_page(newp);
10974 }
10975
10976 if (num_cloned == 0) {
10977 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
10978 }
10979
10980 ret += num_cloned;
10981
10982 // overflow
10983 if (cur_page + ARM_PGBYTES < cur_page) {
10984 break;
10985 } else {
10986 cur_page += ARM_PGBYTES;
10987 }
10988 }
10989
10990 PROF_END
10991
10992 return ret;
10993 }
10994
10995 // delete page infos for given address range
10996 int pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
10997 {
10998 int ret = 0;
10999 bool ints;
11000 queue_head_t *q = &(pmap_pgtrace.pages);
11001 pmap_pgtrace_page_t *p;
11002 vm_map_offset_t cur_page, end_page;
11003
11004 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
11005
11006 PROF_START
11007
11008 pt_entry_t *ptep;
11009 pmap_paddr_t pa_page;
11010
11011 // remove page info from start to end
11012 cur_page = arm_trunc_page(start);
11013 end_page = arm_trunc_page(end);
11014 while (cur_page <= end_page) {
11015 p = NULL;
11016
11017 if (pmap == NULL) {
11018 pa_page = cur_page;
11019 } else {
11020 PMAP_LOCK(pmap);
11021 ptep = pmap_pte(pmap, cur_page);
11022 if (ptep == NULL) {
11023 PMAP_UNLOCK(pmap);
11024 goto cont;
11025 }
11026 pa_page = pte_to_pa(*ptep);
11027 PMAP_UNLOCK(pmap);
11028 }
11029
11030 // remove all clones and validate
11031 pmap_pgtrace_remove_all_clone(pa_page);
11032
11033 // find page info and delete
11034 PMAP_PGTRACE_LOCK(&ints);
11035 p = pmap_pgtrace_find_page(pa_page);
11036 if (p != NULL) {
11037 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
11038 ret++;
11039 }
11040 PMAP_PGTRACE_UNLOCK(&ints);
11041
11042 // free outside of locks
11043 if (p != NULL) {
11044 pmap_pgtrace_free_page(p);
11045 }
11046
11047 cont:
11048 // overflow
11049 if (cur_page + ARM_PGBYTES < cur_page) {
11050 break;
11051 } else {
11052 cur_page += ARM_PGBYTES;
11053 }
11054 }
11055
11056 PROF_END
11057
11058 return ret;
11059 }
11060
11061 kern_return_t pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
11062 {
11063 pt_entry_t *ptep;
11064 pgtrace_run_result_t res;
11065 pmap_pgtrace_page_t *p;
11066 bool ints, found = false;
11067 pmap_paddr_t pa;
11068
11069 // Quick check if we are interested
11070 ptep = pmap_pte(pmap, va);
11071 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11072 return KERN_FAILURE;
11073 }
11074
11075 PMAP_PGTRACE_LOCK(&ints);
11076
11077 // Check again since access is serialized
11078 ptep = pmap_pte(pmap, va);
11079 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11080 PMAP_PGTRACE_UNLOCK(&ints);
11081 return KERN_FAILURE;
11082
11083 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
11084 // Somehow this cpu's tlb has not updated
11085 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
11086 PMAP_UPDATE_TLBS(pmap, va, va+ARM_PGBYTES);
11087
11088 PMAP_PGTRACE_UNLOCK(&ints);
11089 return KERN_SUCCESS;
11090 }
11091
11092 // Find if this pa is what we are tracing
11093 pa = pte_to_pa(*ptep);
11094
11095 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
11096 if (p == NULL) {
11097 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11098 }
11099
11100 // find if pmap and va are also matching
11101 queue_head_t *mapq = &(p->maps);
11102 queue_head_t *mapwaste = &(p->map_waste);
11103 pmap_pgtrace_map_t *map;
11104
11105 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
11106 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11107 found = true;
11108 break;
11109 }
11110 }
11111
11112 // if not found, search map waste as they are still valid
11113 if (!found) {
11114 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
11115 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11116 found = true;
11117 break;
11118 }
11119 }
11120 }
11121
11122 if (!found) {
11123 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11124 }
11125
11126 // Decode and run it on the clone map
11127 bzero(&res, sizeof(res));
11128 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
11129 va, map->cva, // fault va and clone page vas
11130 ss, &res);
11131
11132 // write a log if in range
11133 vm_map_offset_t offset = va - map->ova;
11134 if (map->range.start <= offset && offset <= map->range.end) {
11135 pgtrace_write_log(res);
11136 }
11137
11138 PMAP_PGTRACE_UNLOCK(&ints);
11139
11140 // Return to next instruction
11141 set_saved_state_pc(ss, get_saved_state_pc(ss) + sizeof(uint32_t));
11142
11143 return KERN_SUCCESS;
11144 }
11145 #endif
11146
11147 boolean_t
11148 pmap_enforces_execute_only(
11149 #if (__ARM_VMSA__ == 7)
11150 __unused
11151 #endif
11152 pmap_t pmap)
11153 {
11154 #if (__ARM_VMSA__ > 7)
11155 return (pmap != kernel_pmap);
11156 #else
11157 return FALSE;
11158 #endif
11159 }
11160
11161 MARK_AS_PMAP_TEXT void
11162 pmap_set_jit_entitled_internal(
11163 __unused pmap_t pmap)
11164 {
11165 return;
11166 }
11167
11168 void
11169 pmap_set_jit_entitled(
11170 pmap_t pmap)
11171 {
11172 pmap_set_jit_entitled_internal(pmap);
11173 }
11174
11175 MARK_AS_PMAP_TEXT static kern_return_t
11176 pmap_query_page_info_internal(
11177 pmap_t pmap,
11178 vm_map_offset_t va,
11179 int *disp_p)
11180 {
11181 pmap_paddr_t pa;
11182 int disp;
11183 int pai;
11184 pt_entry_t *pte;
11185 pv_entry_t **pv_h, *pve_p;
11186
11187 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
11188 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11189 *disp_p = 0;
11190 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11191 return KERN_INVALID_ARGUMENT;
11192 }
11193
11194 disp = 0;
11195
11196 VALIDATE_PMAP(pmap);
11197 PMAP_LOCK(pmap);
11198
11199 pte = pmap_pte(pmap, va);
11200 if (pte == PT_ENTRY_NULL) {
11201 goto done;
11202 }
11203
11204 pa = pte_to_pa(*pte);
11205 if (pa == 0) {
11206 if (ARM_PTE_IS_COMPRESSED(*pte)) {
11207 disp |= PMAP_QUERY_PAGE_COMPRESSED;
11208 if (*pte & ARM_PTE_COMPRESSED_ALT) {
11209 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
11210 }
11211 }
11212 } else {
11213 disp |= PMAP_QUERY_PAGE_PRESENT;
11214 pai = (int) pa_index(pa);
11215 if (!pa_valid(pa)) {
11216 goto done;
11217 }
11218 LOCK_PVH(pai);
11219 pv_h = pai_to_pvh(pai);
11220 pve_p = PV_ENTRY_NULL;
11221 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
11222 pve_p = pvh_list(pv_h);
11223 while (pve_p != PV_ENTRY_NULL &&
11224 pve_get_ptep(pve_p) != pte) {
11225 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
11226 }
11227 }
11228 if (IS_ALTACCT_PAGE(pai, pve_p)) {
11229 disp |= PMAP_QUERY_PAGE_ALTACCT;
11230 } else if (IS_REUSABLE_PAGE(pai)) {
11231 disp |= PMAP_QUERY_PAGE_REUSABLE;
11232 } else if (IS_INTERNAL_PAGE(pai)) {
11233 disp |= PMAP_QUERY_PAGE_INTERNAL;
11234 }
11235 UNLOCK_PVH(pai);
11236 }
11237
11238 done:
11239 PMAP_UNLOCK(pmap);
11240 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11241 *disp_p = disp;
11242 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11243 return KERN_SUCCESS;
11244 }
11245
11246 kern_return_t
11247 pmap_query_page_info(
11248 pmap_t pmap,
11249 vm_map_offset_t va,
11250 int *disp_p)
11251 {
11252 return pmap_query_page_info_internal(pmap, va, disp_p);
11253 }
11254
11255 MARK_AS_PMAP_TEXT kern_return_t
11256 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
11257 {
11258
11259 return KERN_SUCCESS;
11260 }
11261
11262 kern_return_t
11263 pmap_return(boolean_t do_panic, boolean_t do_recurse)
11264 {
11265 return pmap_return_internal(do_panic, do_recurse);
11266 }
11267
11268
11269
11270 MARK_AS_PMAP_TEXT static void
11271 pmap_footprint_suspend_internal(
11272 vm_map_t map,
11273 boolean_t suspend)
11274 {
11275 #if DEVELOPMENT || DEBUG
11276 if (suspend) {
11277 current_thread()->pmap_footprint_suspended = TRUE;
11278 map->pmap->footprint_was_suspended = TRUE;
11279 } else {
11280 current_thread()->pmap_footprint_suspended = FALSE;
11281 }
11282 #else /* DEVELOPMENT || DEBUG */
11283 (void) map;
11284 (void) suspend;
11285 #endif /* DEVELOPMENT || DEBUG */
11286 }
11287
11288 void
11289 pmap_footprint_suspend(
11290 vm_map_t map,
11291 boolean_t suspend)
11292 {
11293 pmap_footprint_suspend_internal(map, suspend);
11294 }
11295
11296 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
11297
11298 struct page_table_level_info {
11299 uint64_t size;
11300 uint64_t offmask;
11301 uint64_t shift;
11302 uint64_t index_mask;
11303 uint64_t valid_mask;
11304 uint64_t type_mask;
11305 uint64_t type_block;
11306 };
11307
11308 struct page_table_dump_header {
11309 uint64_t pa;
11310 uint64_t num_entries;
11311 uint64_t start_va;
11312 uint64_t end_va;
11313 };
11314
11315 struct page_table_level_info page_table_levels[] =
11316 { { ARM_TT_L0_SIZE, ARM_TT_L0_OFFMASK, ARM_TT_L0_SHIFT, ARM_TT_L0_INDEX_MASK, ARM_TTE_VALID, ARM_TTE_TYPE_MASK, ARM_TTE_TYPE_BLOCK },
11317 { ARM_TT_L1_SIZE, ARM_TT_L1_OFFMASK, ARM_TT_L1_SHIFT, ARM_TT_L1_INDEX_MASK, ARM_TTE_VALID, ARM_TTE_TYPE_MASK, ARM_TTE_TYPE_BLOCK },
11318 { ARM_TT_L2_SIZE, ARM_TT_L2_OFFMASK, ARM_TT_L2_SHIFT, ARM_TT_L2_INDEX_MASK, ARM_TTE_VALID, ARM_TTE_TYPE_MASK, ARM_TTE_TYPE_BLOCK },
11319 { ARM_TT_L3_SIZE, ARM_TT_L3_OFFMASK, ARM_TT_L3_SHIFT, ARM_TT_L3_INDEX_MASK, ARM_PTE_TYPE_VALID, ARM_PTE_TYPE_MASK, ARM_TTE_TYPE_L3BLOCK } };
11320
11321 static size_t
11322 pmap_dump_page_tables_recurse(const tt_entry_t *ttp,
11323 unsigned int cur_level,
11324 uint64_t start_va,
11325 void *bufp,
11326 void *buf_end)
11327 {
11328 size_t bytes_used = 0;
11329 uint64_t num_entries = ARM_PGBYTES / sizeof(*ttp);
11330 uint64_t size = page_table_levels[cur_level].size;
11331 uint64_t valid_mask = page_table_levels[cur_level].valid_mask;
11332 uint64_t type_mask = page_table_levels[cur_level].type_mask;
11333 uint64_t type_block = page_table_levels[cur_level].type_block;
11334
11335 if (cur_level == arm64_root_pgtable_level)
11336 num_entries = arm64_root_pgtable_num_ttes;
11337
11338 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
11339 const tt_entry_t *tt_end = &ttp[num_entries];
11340
11341 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
11342 return 0;
11343 }
11344
11345 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
11346 header->pa = ml_static_vtop((vm_offset_t)ttp);
11347 header->num_entries = num_entries;
11348 header->start_va = start_va;
11349 header->end_va = start_va + (num_entries * size);
11350
11351 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
11352 bytes_used += (sizeof(*header) + tt_size);
11353 uint64_t current_va = start_va;
11354
11355 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
11356 tt_entry_t tte = *ttep;
11357
11358 if (!(tte & valid_mask)) {
11359 continue;
11360 }
11361
11362 if ((tte & type_mask) == type_block) {
11363 continue;
11364 } else {
11365 if (cur_level >= PMAP_TT_MAX_LEVEL) {
11366 panic("%s: corrupt entry %#llx at %p, "
11367 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
11368 __FUNCTION__, tte, ttep,
11369 ttp, cur_level, bufp, buf_end);
11370 }
11371
11372 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
11373
11374 size_t recurse_result = pmap_dump_page_tables_recurse(next_tt, cur_level + 1, current_va, (uint8_t*)bufp + bytes_used, buf_end);
11375
11376 if (recurse_result == 0) {
11377 return 0;
11378 }
11379
11380 bytes_used += recurse_result;
11381 }
11382 }
11383
11384 return bytes_used;
11385 }
11386
11387 size_t
11388 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end)
11389 {
11390 if (not_in_kdp)
11391 panic("pmap_dump_page_tables must only be called from kernel debugger context");
11392 return pmap_dump_page_tables_recurse(pmap->tte, arm64_root_pgtable_level, pmap->min, bufp, buf_end);
11393 }
11394
11395 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
11396
11397 size_t
11398 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
11399 {
11400 return (size_t)-1;
11401 }
11402
11403 #endif /* !defined(__arm64__) */
11404