]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
a62fd171e6ebd18a3292c61350c938458b6df509
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/thread.h>
41 #include <kern/sched.h>
42 #include <kern/zalloc.h>
43 #include <kern/kalloc.h>
44 #include <kern/ledger.h>
45 #include <kern/misc_protos.h>
46 #include <kern/spl.h>
47 #include <kern/xpr.h>
48
49 #include <vm/pmap.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_kern.h>
52 #include <vm/vm_protos.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/cpm.h>
57
58 #include <libkern/section_keywords.h>
59
60 #include <machine/atomic.h>
61 #include <machine/thread.h>
62 #include <machine/lowglobals.h>
63
64 #include <arm/caches_internal.h>
65 #include <arm/cpu_data.h>
66 #include <arm/cpu_data_internal.h>
67 #include <arm/cpu_capabilities.h>
68 #include <arm/cpu_number.h>
69 #include <arm/machine_cpu.h>
70 #include <arm/misc_protos.h>
71 #include <arm/trap.h>
72
73 #include <libkern/section_keywords.h>
74
75 #if (__ARM_VMSA__ > 7)
76 #include <arm64/proc_reg.h>
77 #include <pexpert/arm64/boot.h>
78 #if CONFIG_PGTRACE
79 #include <stdint.h>
80 #include <arm64/pgtrace.h>
81 #if CONFIG_PGTRACE_NONKEXT
82 #include <arm64/pgtrace_decoder.h>
83 #endif // CONFIG_PGTRACE_NONKEXT
84 #endif
85 #endif
86
87 #include <pexpert/device_tree.h>
88
89 #include <san/kasan.h>
90
91 #if MACH_ASSERT
92 int pmap_stats_assert = 1;
93 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
94 MACRO_BEGIN \
95 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
96 assertf(cond, fmt, ##__VA_ARGS__); \
97 MACRO_END
98 #else /* MACH_ASSERT */
99 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
100 #endif /* MACH_ASSERT */
101
102 #if DEVELOPMENT || DEBUG
103 #define PMAP_FOOTPRINT_SUSPENDED(pmap) ((pmap)->footprint_suspended)
104 #else /* DEVELOPMENT || DEBUG */
105 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
106 #endif /* DEVELOPMENT || DEBUG */
107
108
109
110 #if DEVELOPMENT || DEBUG
111 int panic_on_unsigned_execute = 0;
112 #endif /* DEVELOPMENT || DEBUG */
113
114
115 /* Virtual memory region for early allocation */
116 #if (__ARM_VMSA__ == 7)
117 #define VREGION1_START (VM_HIGH_KERNEL_WINDOW & ~ARM_TT_L1_PT_OFFMASK)
118 #else
119 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
120 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
121 #endif
122 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
123
124 extern unsigned int not_in_kdp;
125
126 extern vm_offset_t first_avail;
127
128 extern pmap_paddr_t avail_start;
129 extern pmap_paddr_t avail_end;
130
131 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
132 extern vm_offset_t virtual_space_end; /* End of kernel address space */
133
134 extern int hard_maxproc;
135
136 #if (__ARM_VMSA__ > 7)
137 /* The number of address bits one TTBR can cover. */
138 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
139
140 /*
141 * The bounds on our TTBRs. These are for sanity checking that
142 * an address is accessible by a TTBR before we attempt to map it.
143 */
144 #define ARM64_TTBR0_MIN_ADDR (0ULL)
145 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
146 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
147 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
148
149 /* The level of the root of a page table. */
150 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
151
152 /* The number of entries in the root TT of a page table. */
153 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
154 #else
155 const uint64_t arm64_root_pgtable_level = 0;
156 const uint64_t arm64_root_pgtable_num_ttes = 0;
157 #endif
158
159 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
160 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
161
162 struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
163 vm_object_t pmap_object = &pmap_object_store;
164
165 static struct zone *pmap_zone; /* zone of pmap structures */
166
167 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA)
168 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
169 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
170
171 queue_head_t tt_pmap_list MARK_AS_PMAP_DATA;
172 unsigned int tt_pmap_count MARK_AS_PMAP_DATA;
173 unsigned int tt_pmap_max MARK_AS_PMAP_DATA;
174
175 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA)
176 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
177
178 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA)
179
180 typedef struct page_free_entry {
181 struct page_free_entry *next;
182 } page_free_entry_t;
183
184 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
185
186 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
187 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
188 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
189
190
191 typedef struct tt_free_entry {
192 struct tt_free_entry *next;
193 } tt_free_entry_t;
194
195 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
196
197 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
198 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
199 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
200 #define FREE_PAGE_SIZE_TT_MAX 4
201 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
202 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
203 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
204 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
205 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
206 unsigned int free_tt_count MARK_AS_PMAP_DATA;
207 unsigned int free_tt_max MARK_AS_PMAP_DATA;
208
209 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
210
211 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
212 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
213 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
214
215 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
216 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
217 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
218 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
219 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
220 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
221 unsigned int inuse_pmap_pages_count = 0; /* debugging */
222
223 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
224 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
225
226 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
227 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
228
229 #if DEVELOPMENT || DEBUG
230 int nx_enabled = 1; /* enable no-execute protection */
231 int allow_data_exec = 0; /* No apps may execute data */
232 int allow_stack_exec = 0; /* No apps may execute from the stack */
233 #else /* DEVELOPMENT || DEBUG */
234 const int nx_enabled = 1; /* enable no-execute protection */
235 const int allow_data_exec = 0; /* No apps may execute data */
236 const int allow_stack_exec = 0; /* No apps may execute from the stack */
237 #endif /* DEVELOPMENT || DEBUG */
238
239 /*
240 * pv_entry_t - structure to track the active mappings for a given page
241 */
242 typedef struct pv_entry {
243 struct pv_entry *pve_next; /* next alias */
244 pt_entry_t *pve_ptep; /* page table entry */
245 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
246 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
247 * are 32-bit:
248 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
249 * pt_desc.
250 */
251 } __attribute__ ((aligned(8))) pv_entry_t;
252 #else
253 } pv_entry_t;
254 #endif
255
256 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
257
258 /*
259 * PMAP LEDGERS:
260 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
261 * as a marker for pages mapped through an "alternate accounting" mapping.
262 * These macros set, clear and test for this marker and extract the actual
263 * value of the "pve_next" pointer.
264 */
265 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
266 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
267 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
268 PVE_NEXT_ALTACCT)
269 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
270 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
271 ~PVE_NEXT_ALTACCT)
272 #define PVE_NEXT_IS_ALTACCT(pve_next) \
273 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
274 #define PVE_NEXT_PTR(pve_next) \
275 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
276 ~PVE_NEXT_ALTACCT))
277 #if MACH_ASSERT
278 static void pmap_check_ledgers(pmap_t pmap);
279 #else
280 static inline void pmap_check_ledgers(__unused pmap_t pmap) {}
281 #endif /* MACH_ASSERT */
282
283 SECURITY_READ_ONLY_LATE(pv_entry_t **) pv_head_table; /* array of pv entry pointers */
284
285 pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
286 pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
287 decl_simple_lock_data(,pv_free_list_lock MARK_AS_PMAP_DATA)
288 decl_simple_lock_data(,pv_kern_free_list_lock MARK_AS_PMAP_DATA)
289
290 decl_simple_lock_data(,phys_backup_lock)
291
292 /*
293 * pt_desc - structure to keep info on page assigned to page tables
294 */
295 #if (__ARM_VMSA__ == 7)
296 #define PT_INDEX_MAX 1
297 #else
298 #if (ARM_PGSHIFT == 14)
299 #define PT_INDEX_MAX 1
300 #else
301 #define PT_INDEX_MAX 4
302 #endif
303 #endif
304
305 #define PT_DESC_REFCOUNT 0x4000U
306
307 typedef struct pt_desc {
308 queue_chain_t pt_page;
309 struct {
310 unsigned short refcnt;
311 unsigned short wiredcnt;
312 } pt_cnt[PT_INDEX_MAX];
313 struct pmap *pmap;
314 struct {
315 vm_offset_t va;
316 } pt_map[PT_INDEX_MAX];
317 } pt_desc_t;
318
319
320 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
321
322 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
323
324 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
325 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
326 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
327 decl_simple_lock_data(,ptd_free_list_lock MARK_AS_PMAP_DATA)
328
329 /*
330 * physical page attribute
331 */
332 typedef u_int16_t pp_attr_t;
333
334 #define PP_ATTR_WIMG_MASK 0x003F
335 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
336
337 #define PP_ATTR_REFERENCED 0x0040
338 #define PP_ATTR_MODIFIED 0x0080
339
340 #define PP_ATTR_INTERNAL 0x0100
341 #define PP_ATTR_REUSABLE 0x0200
342 #define PP_ATTR_ALTACCT 0x0400
343 #define PP_ATTR_NOENCRYPT 0x0800
344
345 #define PP_ATTR_REFFAULT 0x1000
346 #define PP_ATTR_MODFAULT 0x2000
347
348
349 SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
350
351
352 typedef uint8_t io_attr_t;
353
354 #define IO_ATTR_WIMG_MASK 0x3F
355 #define IO_ATTR_WIMG(x) ((x) & IO_ATTR_WIMG_MASK)
356
357 SECURITY_READ_ONLY_LATE(io_attr_t*) io_attr_table;
358
359 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
360 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
361
362 SECURITY_READ_ONLY_LATE(pmap_paddr_t) io_rgn_start = 0;
363 SECURITY_READ_ONLY_LATE(pmap_paddr_t) io_rgn_end = 0;
364 SECURITY_READ_ONLY_LATE(uint32_t) io_rgn_granule = 0;
365
366 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
367
368 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
369 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
370
371 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
372 #if defined(__arm64__)
373 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
374 #endif
375
376 /* free address spaces (1 means free) */
377 static uint32_t asid_bitmap[MAX_ASID / (sizeof(uint32_t) * NBBY)] MARK_AS_PMAP_DATA;
378
379 #if (__ARM_VMSA__ > 7)
380 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
381 #endif
382
383
384 #define pa_index(pa) \
385 (atop((pa) - vm_first_phys))
386
387 #define pai_to_pvh(pai) \
388 (&pv_head_table[pai])
389
390 #define pa_valid(x) \
391 ((x) >= vm_first_phys && (x) < vm_last_phys)
392
393 /* PTE Define Macros */
394
395 #define pte_is_wired(pte) \
396 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
397
398 #define pte_set_wired(ptep, wired) \
399 do { \
400 SInt16 *ptd_wiredcnt_ptr; \
401 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->pt_cnt[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
402 if (wired) { \
403 *ptep |= ARM_PTE_WIRED; \
404 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
405 } else { \
406 *ptep &= ~ARM_PTE_WIRED; \
407 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
408 } \
409 } while(0)
410
411 #define pte_is_ffr(pte) \
412 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
413
414 #define pte_set_ffr(pte, ffr) \
415 do { \
416 if (ffr) { \
417 pte |= ARM_PTE_WRITEABLE; \
418 } else { \
419 pte &= ~ARM_PTE_WRITEABLE; \
420 } \
421 } while(0)
422
423 /* PVE Define Macros */
424
425 #define pve_next(pve) \
426 ((pve)->pve_next)
427
428 #define pve_link_field(pve) \
429 (&pve_next(pve))
430
431 #define pve_link(pp, e) \
432 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
433
434 #define pve_unlink(pp, e) \
435 (pve_next(pp) = pve_next(e))
436
437 /* bits held in the ptep pointer field */
438
439 #define pve_get_ptep(pve) \
440 ((pve)->pve_ptep)
441
442 #define pve_set_ptep(pve, ptep_new) \
443 do { \
444 (pve)->pve_ptep = (ptep_new); \
445 } while (0)
446
447 /* PTEP Define Macros */
448
449 #if (__ARM_VMSA__ == 7)
450
451 #define ARM_PT_DESC_INDEX_MASK 0x00000
452 #define ARM_PT_DESC_INDEX_SHIFT 0
453
454 /*
455 * mask for page descriptor index: 4MB per page table
456 */
457 #define ARM_TT_PT_INDEX_MASK 0xfffU /* mask for page descriptor index: 4MB per page table */
458
459 /*
460 * Shift value used for reconstructing the virtual address for a PTE.
461 */
462 #define ARM_TT_PT_ADDR_SHIFT (10U)
463
464 #define ARM_PT_DESC_INDEX(ptep) \
465 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
466
467 #define ptep_get_ptd(ptep) \
468 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)(ptep) - gVirtBase + gPhysBase))))) & PVH_LIST_MASK))
469
470 #define ptep_get_va(ptep) \
471 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~0xFFF) - gVirtBase + gPhysBase))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
472
473 #define ptep_get_pmap(ptep) \
474 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~0xFFF) - gVirtBase + gPhysBase))))))->pmap))
475
476
477 #else
478
479 #if (ARM_PGSHIFT == 12)
480 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
481 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
482 /*
483 * mask for page descriptor index: 2MB per page table
484 */
485 #define ARM_TT_PT_INDEX_MASK (0x0fffULL)
486 /*
487 * Shift value used for reconstructing the virtual address for a PTE.
488 */
489 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
490
491 /* TODO: Give this a better name/documentation than "other" */
492 #define ARM_TT_PT_OTHER_MASK (0x0fffULL)
493
494 #else
495
496 #define ARM_PT_DESC_INDEX_MASK (0x00000)
497 #define ARM_PT_DESC_INDEX_SHIFT (0)
498 /*
499 * mask for page descriptor index: 32MB per page table
500 */
501 #define ARM_TT_PT_INDEX_MASK (0x3fffULL)
502 /*
503 * Shift value used for reconstructing the virtual address for a PTE.
504 */
505 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
506
507 /* TODO: Give this a better name/documentation than "other" */
508 #define ARM_TT_PT_OTHER_MASK (0x3fffULL)
509 #endif
510
511 #define ARM_PT_DESC_INDEX(ptep) \
512 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
513
514
515 #define ptep_get_ptd(ptep) \
516 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)(ptep) - gVirtBase + gPhysBase))))) & PVH_LIST_MASK))
517
518 #define ptep_get_va(ptep) \
519 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK) - gVirtBase + gPhysBase))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
520
521 #define ptep_get_pmap(ptep) \
522 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK) - gVirtBase + gPhysBase))))))->pmap))
523
524 #endif
525
526
527 /* PVH Define Macros */
528
529 /* pvhead type */
530 #define PVH_TYPE_NULL 0x0UL
531 #define PVH_TYPE_PVEP 0x1UL
532 #define PVH_TYPE_PTEP 0x2UL
533 #define PVH_TYPE_PTDP 0x3UL
534
535 #define PVH_TYPE_MASK (0x3UL)
536 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
537
538 #if (__ARM_VMSA__ == 7)
539 #define pvh_set_bits(h, b) \
540 do { \
541 while (!OSCompareAndSwap(*(vm_offset_t *)(h), *(vm_offset_t *)(h) | (b), (vm_offset_t *)(h))); \
542 } while (0)
543
544 #define pvh_clear_bits(h, b) \
545 do { \
546 while (!OSCompareAndSwap(*(vm_offset_t *)(h), *(vm_offset_t *)(h) & ~(b), (vm_offset_t *)(h))); \
547 } while (0)
548 #else
549 #define pvh_set_bits(h, b) \
550 do { \
551 while (!OSCompareAndSwap64(*(vm_offset_t *)(h), *(vm_offset_t *)(h) | ((int64_t)b), (vm_offset_t *)(h))); \
552 } while (0)
553
554 #define pvh_clear_bits(h, b) \
555 do { \
556 while (!OSCompareAndSwap64(*(vm_offset_t *)(h), *(vm_offset_t *)(h) & ~((int64_t)b), (vm_offset_t *)(h))); \
557 } while (0)
558 #endif
559
560 #define pvh_test_type(h, b) \
561 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
562
563 #define pvh_ptep(h) \
564 ((pt_entry_t *)(*(vm_offset_t *)(h) & PVH_LIST_MASK))
565
566 #define pvh_list(h) \
567 ((pv_entry_t *)(*(vm_offset_t *)(h) & PVH_LIST_MASK))
568
569 #define pvh_bits(h) \
570 (*(vm_offset_t *)(h) & PVH_TYPE_MASK)
571
572 #if (__ARM_VMSA__ == 7)
573 #define pvh_update_head(h, e, t) \
574 do { \
575 while (!OSCompareAndSwap(*(vm_offset_t *)(h), (vm_offset_t)(e) | (t), (vm_offset_t *)(h))); \
576 } while (0)
577 #else
578 #define pvh_update_head(h, e, t) \
579 do { \
580 while (!OSCompareAndSwap64(*(vm_offset_t *)(h), (vm_offset_t)(e) | (t), (vm_offset_t *)(h))); \
581 } while (0)
582 #endif
583
584 #define pvh_add(h, e) \
585 do { \
586 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
587 pve_next(e) = pvh_list(h); \
588 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
589 } while (0)
590
591 #define pvh_remove(h, p, e) \
592 do { \
593 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
594 if ((p) == (h)) { \
595 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
596 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
597 } else { \
598 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
599 } \
600 } else { \
601 /* \
602 * PMAP LEDGERS: \
603 * preserve the "alternate accounting" bit \
604 * when updating "p" (the previous entry's \
605 * "pve_next"). \
606 */ \
607 boolean_t __is_altacct; \
608 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
609 *(p) = PVE_NEXT_PTR(pve_next((e))); \
610 if (__is_altacct) { \
611 PVE_NEXT_SET_ALTACCT((p)); \
612 } else { \
613 PVE_NEXT_CLR_ALTACCT((p)); \
614 } \
615 } \
616 } while (0)
617
618
619 /* PPATTR Define Macros */
620
621 #define ppattr_set_bits(h, b) \
622 do { \
623 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
624 } while (0)
625
626 #define ppattr_clear_bits(h, b) \
627 do { \
628 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
629 } while (0)
630
631 #define ppattr_test_bits(h, b) \
632 ((*(pp_attr_t *)(h) & (b)) == (b))
633
634 #define pa_set_bits(x, b) \
635 do { \
636 if (pa_valid(x)) \
637 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
638 (b)); \
639 } while (0)
640
641 #define pa_test_bits(x, b) \
642 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
643 (b)) : FALSE)
644
645 #define pa_clear_bits(x, b) \
646 do { \
647 if (pa_valid(x)) \
648 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
649 (b)); \
650 } while (0)
651
652 #define pa_set_modify(x) \
653 pa_set_bits(x, PP_ATTR_MODIFIED)
654
655 #define pa_clear_modify(x) \
656 pa_clear_bits(x, PP_ATTR_MODIFIED)
657
658 #define pa_set_reference(x) \
659 pa_set_bits(x, PP_ATTR_REFERENCED)
660
661 #define pa_clear_reference(x) \
662 pa_clear_bits(x, PP_ATTR_REFERENCED)
663
664
665 #define IS_INTERNAL_PAGE(pai) \
666 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
667 #define SET_INTERNAL_PAGE(pai) \
668 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
669 #define CLR_INTERNAL_PAGE(pai) \
670 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
671
672 #define IS_REUSABLE_PAGE(pai) \
673 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
674 #define SET_REUSABLE_PAGE(pai) \
675 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
676 #define CLR_REUSABLE_PAGE(pai) \
677 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
678
679 #define IS_ALTACCT_PAGE(pai, pve_p) \
680 (((pve_p) == NULL) \
681 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
682 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
683 #define SET_ALTACCT_PAGE(pai, pve_p) \
684 if ((pve_p) == NULL) { \
685 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
686 } else { \
687 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
688 }
689 #define CLR_ALTACCT_PAGE(pai, pve_p) \
690 if ((pve_p) == NULL) { \
691 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT);\
692 } else { \
693 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
694 }
695
696 #define IS_REFFAULT_PAGE(pai) \
697 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
698 #define SET_REFFAULT_PAGE(pai) \
699 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
700 #define CLR_REFFAULT_PAGE(pai) \
701 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
702
703 #define IS_MODFAULT_PAGE(pai) \
704 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
705 #define SET_MODFAULT_PAGE(pai) \
706 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
707 #define CLR_MODFAULT_PAGE(pai) \
708 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
709
710
711 #if (__ARM_VMSA__ == 7)
712
713 #define tte_index(pmap, addr) \
714 ttenum((addr))
715
716 #define tte_get_ptd(tte) \
717 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK)))))) & PVH_LIST_MASK))
718
719 #else
720
721 #define tt0_index(pmap, addr) \
722 (((addr) & ARM_TT_L0_INDEX_MASK) >> ARM_TT_L0_SHIFT)
723
724 #define tt1_index(pmap, addr) \
725 (((addr) & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT)
726
727 #define tt2_index(pmap, addr) \
728 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
729
730 #define tt3_index(pmap, addr) \
731 (((addr) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)
732
733 #define tte_index(pmap, addr) \
734 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
735
736 #define tte_get_ptd(tte) \
737 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK)))))) & PVH_LIST_MASK))
738
739 #endif
740
741 /*
742 * Lock on pmap system
743 */
744
745 #define PMAP_LOCK_INIT(pmap) { \
746 simple_lock_init(&(pmap)->lock, 0); \
747 }
748
749 #define PMAP_LOCK(pmap) { \
750 simple_lock(&(pmap)->lock); \
751 }
752
753 #define PMAP_UNLOCK(pmap) { \
754 simple_unlock(&(pmap)->lock); \
755 }
756
757 #if MACH_ASSERT
758 #define PMAP_ASSERT_LOCKED(pmap) { \
759 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
760 }
761 #else
762 #define PMAP_ASSERT_LOCKED(pmap)
763 #endif
764
765 /*
766 * Each entry in the pv_head_table is locked by a bit in the
767 * pv lock array, which is stored in the region preceding pv_head_table.
768 * The lock bits are accessed by the physical address of the page they lock.
769 */
770 #define LOCK_PVH(index) { \
771 hw_lock_bit((hw_lock_bit_t *) \
772 ((unsigned int*)pv_head_table)-1-(index>>5), \
773 (index&0x1F)); \
774 }
775
776 #define UNLOCK_PVH(index) { \
777 hw_unlock_bit((hw_lock_bit_t *) \
778 ((unsigned int*)pv_head_table)-1-(index>>5), \
779 (index&0x1F)); \
780 }
781
782 #define ASSERT_PVH_LOCKED(index) { \
783 assert(*(((unsigned int*)pv_head_table)-1-(index>>5)) & (1 << (index & 0x1F))); \
784 }
785
786 #define PMAP_UPDATE_TLBS(pmap, s, e) { \
787 flush_mmu_tlb_region_asid(s, (unsigned)(e - s), pmap); \
788 }
789
790 #ifdef __ARM_L1_PTW__
791
792 #define FLUSH_PTE_RANGE(spte, epte) \
793 __asm__ volatile("dsb ish");
794
795 #define FLUSH_PTE(pte_p) \
796 __asm__ volatile("dsb ish");
797
798 #else
799
800 #define FLUSH_PTE_RANGE(spte, epte) \
801 CleanPoU_DcacheRegion((vm_offset_t)spte, \
802 (vm_offset_t)epte - (vm_offset_t)spte);
803
804 #define FLUSH_PTE(pte_p) \
805 CleanPoU_DcacheRegion((vm_offset_t)pte_p, sizeof(pt_entry_t));
806 #endif
807
808 #define WRITE_PTE(pte_p, pte_entry) \
809 __unreachable_ok_push \
810 if (TEST_PAGE_RATIO_4) { \
811 do { \
812 if (((unsigned)(pte_p)) & 0x1f) panic("WRITE_PTE\n"); \
813 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
814 *(pte_p) = (pte_entry); \
815 *((pte_p)+1) = (pte_entry); \
816 *((pte_p)+2) = (pte_entry); \
817 *((pte_p)+3) = (pte_entry); \
818 } else { \
819 *(pte_p) = (pte_entry); \
820 *((pte_p)+1) = (pte_entry) | 0x1000; \
821 *((pte_p)+2) = (pte_entry) | 0x2000; \
822 *((pte_p)+3) = (pte_entry) | 0x3000; \
823 } \
824 FLUSH_PTE_RANGE((pte_p),((pte_p)+4)); \
825 } while(0); \
826 } else { \
827 do { \
828 *(pte_p) = (pte_entry); \
829 FLUSH_PTE(pte_p); \
830 } while(0); \
831 } \
832 __unreachable_ok_pop
833
834 #define WRITE_PTE_FAST(pte_p, pte_entry) \
835 __unreachable_ok_push \
836 if (TEST_PAGE_RATIO_4) { \
837 if (((unsigned)(pte_p)) & 0x1f) panic("WRITE_PTE\n"); \
838 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
839 *(pte_p) = (pte_entry); \
840 *((pte_p)+1) = (pte_entry); \
841 *((pte_p)+2) = (pte_entry); \
842 *((pte_p)+3) = (pte_entry); \
843 } else { \
844 *(pte_p) = (pte_entry); \
845 *((pte_p)+1) = (pte_entry) | 0x1000; \
846 *((pte_p)+2) = (pte_entry) | 0x2000; \
847 *((pte_p)+3) = (pte_entry) | 0x3000; \
848 } \
849 } else { \
850 *(pte_p) = (pte_entry); \
851 } \
852 __unreachable_ok_pop
853
854
855 /*
856 * Other useful macros.
857 */
858 #define current_pmap() \
859 (vm_map_pmap(current_thread()->map))
860
861 #define PMAP_IS_VALID(x) (TRUE)
862
863 #ifdef PMAP_TRACES
864 unsigned int pmap_trace = 0;
865
866 #define PMAP_TRACE(...) \
867 if (pmap_trace) { \
868 KDBG_RELEASE(__VA_ARGS__); \
869 }
870 #else
871 #define PMAP_TRACE(...) KDBG_DEBUG(__VA_ARGS__)
872 #endif
873
874 #define PMAP_TRACE_CONSTANT(...) KDBG_RELEASE(__VA_ARGS__)
875
876 /*
877 * Internal function prototypes (forward declarations).
878 */
879
880 static void pv_init(
881 void);
882
883 static boolean_t pv_alloc(
884 pmap_t pmap,
885 unsigned int pai,
886 pv_entry_t **pvepp);
887
888 static void pv_free(
889 pv_entry_t *pvep);
890
891 static void pv_list_free(
892 pv_entry_t *pvehp,
893 pv_entry_t *pvetp,
894 unsigned int cnt);
895
896 static void ptd_bootstrap(
897 pt_desc_t *ptdp, unsigned int ptd_cnt);
898
899 static pt_desc_t *ptd_alloc(
900 pmap_t pmap);
901
902 static void ptd_deallocate(
903 pt_desc_t *ptdp);
904
905 static void ptd_init(
906 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
907
908 static void pmap_zone_init(
909 void);
910
911 static void pmap_set_reference(
912 ppnum_t pn);
913
914 ppnum_t pmap_vtophys(
915 pmap_t pmap, addr64_t va);
916
917 void pmap_switch_user_ttb(
918 pmap_t pmap);
919
920 static void flush_mmu_tlb_region_asid(
921 vm_offset_t va, unsigned length, pmap_t pmap);
922
923 static kern_return_t pmap_expand(
924 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
925
926 static int pmap_remove_range(
927 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
928
929 static int pmap_remove_range_options(
930 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, int);
931
932 static tt_entry_t *pmap_tt1_allocate(
933 pmap_t, vm_size_t, unsigned int);
934
935 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
936
937 static void pmap_tt1_deallocate(
938 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
939
940 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
941
942 static kern_return_t pmap_tt_allocate(
943 pmap_t, tt_entry_t **, unsigned int, unsigned int);
944
945 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
946
947 static void pmap_tte_deallocate(
948 pmap_t, tt_entry_t *, unsigned int);
949
950 #define PMAP_TT_L1_LEVEL 0x1
951 #define PMAP_TT_L2_LEVEL 0x2
952 #define PMAP_TT_L3_LEVEL 0x3
953 #if (__ARM_VMSA__ == 7)
954 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
955 #else
956 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
957 #endif
958
959 #ifdef __ARM64_PMAP_SUBPAGE_L1__
960 #if (__ARM_VMSA__ <= 7)
961 #error This is not supported for old-style page tables
962 #endif
963 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
964 #else
965 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
966 #endif
967
968 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
969 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
970 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
971
972 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
973
974 void pmap_init_pte_page_internal(
975 pmap_t, pt_entry_t *, vm_offset_t, unsigned int , pt_desc_t **);
976
977
978 #if (__ARM_VMSA__ > 7)
979
980 static inline tt_entry_t *pmap_tt1e(
981 pmap_t, vm_map_address_t);
982
983 static inline tt_entry_t *pmap_tt2e(
984 pmap_t, vm_map_address_t);
985
986 static inline pt_entry_t *pmap_tt3e(
987 pmap_t, vm_map_address_t);
988
989 static void pmap_unmap_sharedpage(
990 pmap_t pmap);
991
992 static void pmap_sharedpage_flush_32_to_64(
993 void);
994
995 static boolean_t
996 pmap_is_64bit(pmap_t);
997
998
999 #endif
1000 static inline tt_entry_t *pmap_tte(
1001 pmap_t, vm_map_address_t);
1002
1003 static inline pt_entry_t *pmap_pte(
1004 pmap_t, vm_map_address_t);
1005
1006 static void pmap_update_cache_attributes_locked(
1007 ppnum_t, unsigned);
1008
1009 boolean_t arm_clear_fast_fault(
1010 ppnum_t ppnum,
1011 vm_prot_t fault_type);
1012
1013 static pmap_paddr_t pmap_pages_reclaim(
1014 void);
1015
1016 static kern_return_t pmap_pages_alloc(
1017 pmap_paddr_t *pa,
1018 unsigned size,
1019 unsigned option);
1020
1021 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1022 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1023
1024 static void pmap_pages_free(
1025 pmap_paddr_t pa,
1026 unsigned size);
1027
1028
1029 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1030 static __return_type __function_name##_internal __function_args;
1031
1032 PMAP_SUPPORT_PROTOTYPES(
1033 kern_return_t,
1034 arm_fast_fault, (pmap_t pmap,
1035 vm_map_address_t va,
1036 vm_prot_t fault_type,
1037 boolean_t from_user), ARM_FAST_FAULT_INDEX);
1038
1039
1040 PMAP_SUPPORT_PROTOTYPES(
1041 boolean_t,
1042 arm_force_fast_fault, (ppnum_t ppnum,
1043 vm_prot_t allow_mode,
1044 int options), ARM_FORCE_FAST_FAULT_INDEX);
1045
1046 PMAP_SUPPORT_PROTOTYPES(
1047 kern_return_t,
1048 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
1049
1050 PMAP_SUPPORT_PROTOTYPES(
1051 kern_return_t,
1052 mapping_replenish, (void), MAPPING_REPLENISH_INDEX);
1053
1054 PMAP_SUPPORT_PROTOTYPES(
1055 boolean_t,
1056 pmap_batch_set_cache_attributes, (ppnum_t pn,
1057 unsigned int cacheattr,
1058 unsigned int page_cnt,
1059 unsigned int page_index,
1060 boolean_t doit,
1061 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
1062
1063 PMAP_SUPPORT_PROTOTYPES(
1064 void,
1065 pmap_change_wiring, (pmap_t pmap,
1066 vm_map_address_t v,
1067 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
1068
1069 PMAP_SUPPORT_PROTOTYPES(
1070 pmap_t,
1071 pmap_create, (ledger_t ledger,
1072 vm_map_size_t size,
1073 boolean_t is_64bit), PMAP_CREATE_INDEX);
1074
1075 PMAP_SUPPORT_PROTOTYPES(
1076 void,
1077 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
1078
1079
1080
1081 PMAP_SUPPORT_PROTOTYPES(
1082 kern_return_t,
1083 pmap_enter_options, (pmap_t pmap,
1084 vm_map_address_t v,
1085 ppnum_t pn,
1086 vm_prot_t prot,
1087 vm_prot_t fault_type,
1088 unsigned int flags,
1089 boolean_t wired,
1090 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
1091
1092 PMAP_SUPPORT_PROTOTYPES(
1093 vm_offset_t,
1094 pmap_extract, (pmap_t pmap,
1095 vm_map_address_t va), PMAP_EXTRACT_INDEX);
1096
1097 PMAP_SUPPORT_PROTOTYPES(
1098 ppnum_t,
1099 pmap_find_phys, (pmap_t pmap,
1100 addr64_t va), PMAP_FIND_PHYS_INDEX);
1101
1102 #if (__ARM_VMSA__ > 7)
1103 PMAP_SUPPORT_PROTOTYPES(
1104 void,
1105 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
1106 #endif
1107
1108
1109 PMAP_SUPPORT_PROTOTYPES(
1110 boolean_t,
1111 pmap_is_empty, (pmap_t pmap,
1112 vm_map_offset_t va_start,
1113 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
1114
1115
1116 PMAP_SUPPORT_PROTOTYPES(
1117 unsigned int,
1118 pmap_map_cpu_windows_copy, (ppnum_t pn,
1119 vm_prot_t prot,
1120 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
1121
1122 PMAP_SUPPORT_PROTOTYPES(
1123 kern_return_t,
1124 pmap_nest, (pmap_t grand,
1125 pmap_t subord,
1126 addr64_t vstart,
1127 addr64_t nstart,
1128 uint64_t size), PMAP_NEST_INDEX);
1129
1130 PMAP_SUPPORT_PROTOTYPES(
1131 void,
1132 pmap_page_protect_options, (ppnum_t ppnum,
1133 vm_prot_t prot,
1134 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
1135
1136 PMAP_SUPPORT_PROTOTYPES(
1137 void,
1138 pmap_protect_options, (pmap_t pmap,
1139 vm_map_address_t start,
1140 vm_map_address_t end,
1141 vm_prot_t prot,
1142 unsigned int options,
1143 void *args), PMAP_PROTECT_OPTIONS_INDEX);
1144
1145 PMAP_SUPPORT_PROTOTYPES(
1146 kern_return_t,
1147 pmap_query_page_info, (pmap_t pmap,
1148 vm_map_offset_t va,
1149 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
1150
1151 PMAP_SUPPORT_PROTOTYPES(
1152 boolean_t,
1153 pmap_query_resident, (pmap_t pmap,
1154 vm_map_address_t start,
1155 vm_map_address_t end,
1156 mach_vm_size_t *resident_bytes_p,
1157 mach_vm_size_t *compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
1158
1159 PMAP_SUPPORT_PROTOTYPES(
1160 void,
1161 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
1162
1163 PMAP_SUPPORT_PROTOTYPES(
1164 int,
1165 pmap_remove_options, (pmap_t pmap,
1166 vm_map_address_t start,
1167 vm_map_address_t end,
1168 int options), PMAP_REMOVE_OPTIONS_INDEX);
1169
1170 PMAP_SUPPORT_PROTOTYPES(
1171 kern_return_t,
1172 pmap_return, (boolean_t do_panic,
1173 boolean_t do_recurse), PMAP_RETURN_INDEX);
1174
1175 PMAP_SUPPORT_PROTOTYPES(
1176 void,
1177 pmap_set_cache_attributes, (ppnum_t pn,
1178 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1179
1180 PMAP_SUPPORT_PROTOTYPES(
1181 void,
1182 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1183
1184 #if MACH_ASSERT
1185 PMAP_SUPPORT_PROTOTYPES(
1186 void,
1187 pmap_set_process, (pmap_t pmap,
1188 int pid,
1189 char *procname), PMAP_SET_PROCESS_INDEX);
1190 #endif
1191
1192
1193 PMAP_SUPPORT_PROTOTYPES(
1194 void,
1195 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1196
1197 PMAP_SUPPORT_PROTOTYPES(
1198 kern_return_t,
1199 pmap_unnest_options, (pmap_t grand,
1200 addr64_t vaddr,
1201 uint64_t size,
1202 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1203
1204
1205 PMAP_SUPPORT_PROTOTYPES(
1206 void,
1207 phys_attribute_set, (ppnum_t pn,
1208 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1209
1210
1211 PMAP_SUPPORT_PROTOTYPES(
1212 void,
1213 phys_attribute_clear, (ppnum_t pn,
1214 unsigned int bits,
1215 int options,
1216 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1217
1218 PMAP_SUPPORT_PROTOTYPES(
1219 void,
1220 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1221
1222 PMAP_SUPPORT_PROTOTYPES(
1223 void,
1224 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
1225
1226
1227
1228 void pmap_footprint_suspend(vm_map_t map,
1229 boolean_t suspend);
1230 PMAP_SUPPORT_PROTOTYPES(
1231 void,
1232 pmap_footprint_suspend, (vm_map_t map,
1233 boolean_t suspend),
1234 PMAP_FOOTPRINT_SUSPEND_INDEX);
1235
1236 #if CONFIG_PGTRACE
1237 boolean_t pgtrace_enabled = 0;
1238
1239 typedef struct {
1240 queue_chain_t chain;
1241
1242 /*
1243 pmap - pmap for below addresses
1244 ova - original va page address
1245 cva - clone va addresses for pre, target and post pages
1246 cva_spte - clone saved ptes
1247 range - trace range in this map
1248 cloned - has been cloned or not
1249 */
1250 pmap_t pmap;
1251 vm_map_offset_t ova;
1252 vm_map_offset_t cva[3];
1253 pt_entry_t cva_spte[3];
1254 struct {
1255 pmap_paddr_t start;
1256 pmap_paddr_t end;
1257 } range;
1258 bool cloned;
1259 } pmap_pgtrace_map_t;
1260
1261 static void pmap_pgtrace_init(void);
1262 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
1263 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
1264 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
1265 #endif
1266
1267 #if (__ARM_VMSA__ > 7)
1268 /*
1269 * The low global vector page is mapped at a fixed alias.
1270 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1271 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1272 * to check both addresses anyway for backward compatibility. So for now
1273 * we leave H6 and H7 where they were.
1274 */
1275 #if (ARM_PGSHIFT == 14)
1276 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1277 #else
1278 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1279 #endif
1280
1281 #else
1282 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1283 #endif
1284
1285 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1286 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1287 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1288 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
1289
1290 int pt_fake_zone_index = -1; /* index of pmap fake zone */
1291
1292
1293
1294 /*
1295 * Allocates and initializes a per-CPU data structure for the pmap.
1296 */
1297 static void
1298 pmap_cpu_data_init_internal(unsigned int cpu_number)
1299 {
1300 pmap_cpu_data_t * pmap_cpu_data = NULL;
1301
1302 pmap_cpu_data = pmap_get_cpu_data();
1303 pmap_cpu_data->cpu_number = cpu_number;
1304 }
1305
1306 void
1307 pmap_cpu_data_init(void)
1308 {
1309 pmap_cpu_data_init_internal(cpu_number());
1310 }
1311
1312 static void
1313 pmap_cpu_data_array_init(void)
1314 {
1315
1316 pmap_cpu_data_init();
1317 }
1318
1319 pmap_cpu_data_t *
1320 pmap_get_cpu_data(void)
1321 {
1322 pmap_cpu_data_t * pmap_cpu_data = NULL;
1323
1324 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
1325
1326 return pmap_cpu_data;
1327 }
1328
1329
1330 /* TODO */
1331 pmap_paddr_t
1332 pmap_pages_reclaim(
1333 void)
1334 {
1335 boolean_t found_page;
1336 unsigned i;
1337 pt_desc_t *ptdp;
1338
1339
1340 /*
1341 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1342 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1343 * entry and it contains at least one valid pte entry.
1344 *
1345 * In a loop, check for a page in the reclaimed pt page list.
1346 * if one is present, unlink that page and return the physical page address.
1347 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1348 * If found, invoke pmap_remove_range() on its pmap and address range then
1349 * deallocates that pt page. This will end up adding the pt page to the
1350 * reclaimed pt page list.
1351 * If no eligible page were found in the pt page list, panic.
1352 */
1353
1354 simple_lock(&pmap_pages_lock);
1355 pmap_pages_request_count++;
1356 pmap_pages_request_acum++;
1357
1358 while (1) {
1359
1360 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
1361 page_free_entry_t *page_entry;
1362
1363 page_entry = pmap_pages_reclaim_list;
1364 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
1365 simple_unlock(&pmap_pages_lock);
1366
1367 return((pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry));
1368 }
1369
1370 simple_unlock(&pmap_pages_lock);
1371
1372 simple_lock(&pt_pages_lock);
1373 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
1374 found_page = FALSE;
1375
1376 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
1377 if ((ptdp->pmap != kernel_pmap)
1378 && (ptdp->pmap->nested == FALSE)
1379 && (simple_lock_try(&ptdp->pmap->lock))) {
1380
1381 unsigned refcnt_acc = 0;
1382 unsigned wiredcnt_acc = 0;
1383
1384 for (i = 0 ; i < PT_INDEX_MAX ; i++) {
1385 if (ptdp->pt_cnt[i].refcnt & PT_DESC_REFCOUNT) {
1386 /* Do not attempt to free a page that contains an L2 table
1387 * or is currently being operated on by pmap_enter(),
1388 * which can drop the pmap lock. */
1389 refcnt_acc = 0;
1390 break;
1391 }
1392 refcnt_acc += ptdp->pt_cnt[i].refcnt;
1393 wiredcnt_acc += ptdp->pt_cnt[i].wiredcnt;
1394 }
1395 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
1396 found_page = TRUE;
1397 /* Leave ptdp->pmap locked here. We're about to reclaim
1398 * a tt page from it, so we don't want anyone else messing
1399 * with it while we do that. */
1400 break;
1401 }
1402 simple_unlock(&ptdp->pmap->lock);
1403 }
1404 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
1405 }
1406 if (!found_page) {
1407 panic("pmap_pages_reclaim(): No eligible page in pt_page_list\n");
1408 } else {
1409 int remove_count = 0;
1410 vm_map_address_t va;
1411 pmap_t pmap;
1412 pt_entry_t *bpte, *epte;
1413 pt_entry_t *pte_p;
1414 tt_entry_t *tte_p;
1415 uint32_t rmv_spte=0;
1416
1417 simple_unlock(&pt_pages_lock);
1418 pmap = ptdp->pmap;
1419 PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
1420 for (i = 0 ; i < PT_INDEX_MAX ; i++) {
1421 va = ptdp->pt_map[i].va;
1422
1423 tte_p = pmap_tte(pmap, va);
1424 if ((tte_p != (tt_entry_t *) NULL)
1425 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
1426
1427 #if (__ARM_VMSA__ == 7)
1428 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1429 bpte = &pte_p[ptenum(va)];
1430 epte = bpte + PAGE_SIZE/sizeof(pt_entry_t);
1431 #else
1432 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1433 bpte = &pte_p[tt3_index(pmap, va)];
1434 epte = bpte + PAGE_SIZE/sizeof(pt_entry_t);
1435 #endif
1436 /*
1437 * Use PMAP_OPTIONS_REMOVE to clear any
1438 * "compressed" markers and update the
1439 * "compressed" counter in pmap->stats.
1440 * This means that we lose accounting for
1441 * any compressed pages in this range
1442 * but the alternative is to not be able
1443 * to account for their future decompression,
1444 * which could cause the counter to drift
1445 * more and more.
1446 */
1447 remove_count += pmap_remove_range_options(
1448 pmap, va, bpte, epte,
1449 &rmv_spte, PMAP_OPTIONS_REMOVE);
1450 if (ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0)
1451 panic("pmap_pages_reclaim(): ptdp %p, count %d\n", ptdp, ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt);
1452 #if (__ARM_VMSA__ == 7)
1453 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
1454 flush_mmu_tlb_entry((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->asid & 0xff));
1455 flush_mmu_tlb_entry(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->asid & 0xff));
1456 flush_mmu_tlb_entry(((va & ~ARM_TT_L1_PT_OFFMASK) + 2*ARM_TT_L1_SIZE)| (pmap->asid & 0xff));
1457 flush_mmu_tlb_entry(((va & ~ARM_TT_L1_PT_OFFMASK) + 3*ARM_TT_L1_SIZE)| (pmap->asid & 0xff));
1458 #else
1459 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
1460 flush_mmu_tlb_entry(tlbi_addr(va & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
1461 #endif
1462
1463 if (remove_count > 0) {
1464 #if (__ARM_VMSA__ == 7)
1465 PMAP_UPDATE_TLBS(pmap, va, va+4*ARM_TT_L1_SIZE);
1466 #else
1467 PMAP_UPDATE_TLBS(pmap, va, va+ARM_TT_L2_SIZE);
1468 #endif
1469 }
1470 }
1471 }
1472 // Undo the lock we grabbed when we found ptdp above
1473 PMAP_UNLOCK(pmap);
1474 }
1475 simple_lock(&pmap_pages_lock);
1476 }
1477 }
1478
1479
1480 static kern_return_t
1481 pmap_pages_alloc(
1482 pmap_paddr_t *pa,
1483 unsigned size,
1484 unsigned option)
1485 {
1486 vm_page_t m = VM_PAGE_NULL, m_prev;
1487
1488 if(option & PMAP_PAGES_RECLAIM_NOWAIT) {
1489 assert(size == PAGE_SIZE);
1490 *pa = pmap_pages_reclaim();
1491 return KERN_SUCCESS;
1492 }
1493 if (size == PAGE_SIZE) {
1494 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
1495 if(option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1496 return KERN_RESOURCE_SHORTAGE;
1497 }
1498
1499 VM_PAGE_WAIT();
1500 }
1501 vm_page_lock_queues();
1502 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
1503 vm_page_unlock_queues();
1504 }
1505 if (size == 2*PAGE_SIZE) {
1506 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
1507 if(option & PMAP_PAGES_ALLOCATE_NOWAIT)
1508 return KERN_RESOURCE_SHORTAGE;
1509
1510 VM_PAGE_WAIT();
1511 }
1512 }
1513
1514 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
1515
1516 vm_object_lock(pmap_object);
1517 while (m != VM_PAGE_NULL) {
1518 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
1519 m_prev = m;
1520 m = NEXT_PAGE(m_prev);
1521 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
1522 }
1523 vm_object_unlock(pmap_object);
1524
1525 OSAddAtomic(size>>PAGE_SHIFT, &inuse_pmap_pages_count);
1526 OSAddAtomic64(size>>PAGE_SHIFT, &alloc_pmap_pages_count);
1527
1528 return KERN_SUCCESS;
1529 }
1530
1531
1532 static void
1533 pmap_pages_free(
1534 pmap_paddr_t pa,
1535 unsigned size)
1536 {
1537 simple_lock(&pmap_pages_lock);
1538
1539 if (pmap_pages_request_count != 0) {
1540 page_free_entry_t *page_entry;
1541
1542 pmap_pages_request_count--;
1543 page_entry = (page_free_entry_t *)phystokv(pa);
1544 page_entry->next = pmap_pages_reclaim_list;
1545 pmap_pages_reclaim_list = page_entry;
1546 simple_unlock(&pmap_pages_lock);
1547
1548 return;
1549 }
1550
1551 simple_unlock(&pmap_pages_lock);
1552
1553 vm_page_t m;
1554 pmap_paddr_t pa_max;
1555
1556 OSAddAtomic(-(size>>PAGE_SHIFT), &inuse_pmap_pages_count);
1557
1558 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
1559 vm_object_lock(pmap_object);
1560 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
1561 assert(m != VM_PAGE_NULL);
1562 assert(VM_PAGE_WIRED(m));
1563 vm_page_lock_queues();
1564 vm_page_free(m);
1565 vm_page_unlock_queues();
1566 vm_object_unlock(pmap_object);
1567 }
1568 }
1569
1570 static inline void
1571 PMAP_ZINFO_PALLOC(
1572 pmap_t pmap, int bytes)
1573 {
1574 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
1575 }
1576
1577 static inline void
1578 PMAP_ZINFO_PFREE(
1579 pmap_t pmap,
1580 int bytes)
1581 {
1582 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
1583 }
1584
1585 static inline void
1586 pmap_tt_ledger_credit(
1587 pmap_t pmap,
1588 vm_size_t size)
1589 {
1590 if (pmap != kernel_pmap) {
1591 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
1592 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
1593 }
1594 }
1595
1596 static inline void
1597 pmap_tt_ledger_debit(
1598 pmap_t pmap,
1599 vm_size_t size)
1600 {
1601 if (pmap != kernel_pmap) {
1602 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
1603 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
1604 }
1605 }
1606
1607 static unsigned int
1608 alloc_asid(
1609 void)
1610 {
1611 unsigned int asid_bitmap_index;
1612
1613 simple_lock(&pmaps_lock);
1614 for (asid_bitmap_index = 0; asid_bitmap_index < (MAX_ASID / (sizeof(uint32_t) * NBBY)); asid_bitmap_index++) {
1615 unsigned int temp = ffs(asid_bitmap[asid_bitmap_index]);
1616 if (temp > 0) {
1617 temp -= 1;
1618 asid_bitmap[asid_bitmap_index] &= ~(1 << temp);
1619 #if __ARM_KERNEL_PROTECT__
1620 /*
1621 * We need two ASIDs: n and (n | 1). n is used for EL0,
1622 * (n | 1) for EL1.
1623 */
1624 unsigned int temp2 = temp | 1;
1625 assert(temp2 < MAX_ASID);
1626 assert(temp2 < 32);
1627 assert(temp2 != temp);
1628 assert(asid_bitmap[asid_bitmap_index] & (1 << temp2));
1629
1630 /* Grab the second ASID. */
1631 asid_bitmap[asid_bitmap_index] &= ~(1 << temp2);
1632 #endif /* __ARM_KERNEL_PROTECT__ */
1633 simple_unlock(&pmaps_lock);
1634
1635 /*
1636 * We should never vend out physical ASID 0 through this
1637 * method, as it belongs to the kernel.
1638 */
1639 assert(((asid_bitmap_index * sizeof(uint32_t) * NBBY + temp) % ARM_MAX_ASID) != 0);
1640
1641 #if __ARM_KERNEL_PROTECT__
1642 /* Or the kernel EL1 ASID. */
1643 assert(((asid_bitmap_index * sizeof(uint32_t) * NBBY + temp) % ARM_MAX_ASID) != 1);
1644 #endif /* __ARM_KERNEL_PROTECT__ */
1645
1646 return (asid_bitmap_index * sizeof(uint32_t) * NBBY + temp);
1647 }
1648 }
1649 simple_unlock(&pmaps_lock);
1650 /*
1651 * ToDo: Add code to deal with pmap with no asid panic for now. Not
1652 * an issue with the small config process hard limit
1653 */
1654 panic("alloc_asid(): out of ASID number");
1655 return MAX_ASID;
1656 }
1657
1658 static void
1659 free_asid(
1660 int asid)
1661 {
1662 /* Don't free up any alias of physical ASID 0. */
1663 assert((asid % ARM_MAX_ASID) != 0);
1664
1665 simple_lock(&pmaps_lock);
1666 setbit(asid, (int *) asid_bitmap);
1667
1668 #if __ARM_KERNEL_PROTECT__
1669 assert((asid | 1) < MAX_ASID);
1670 assert((asid | 1) != asid);
1671 setbit(asid | 1, (int *) asid_bitmap);
1672 #endif /* __ARM_KERNEL_PROTECT__ */
1673
1674 simple_unlock(&pmaps_lock);
1675 }
1676
1677 #define PV_LOW_WATER_MARK_DEFAULT 0x200
1678 #define PV_KERN_LOW_WATER_MARK_DEFAULT 0x200
1679 #define PV_ALLOC_CHUNK_INITIAL 0x200
1680 #define PV_KERN_ALLOC_CHUNK_INITIAL 0x200
1681 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
1682 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
1683
1684
1685 uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
1686 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
1687 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
1688
1689 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
1690 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
1691 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
1692 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
1693
1694 thread_t mapping_replenish_thread;
1695 event_t mapping_replenish_event;
1696 event_t pmap_user_pv_throttle_event;
1697 volatile uint32_t mappingrecurse = 0;
1698
1699 uint64_t pmap_pv_throttle_stat;
1700 uint64_t pmap_pv_throttled_waiters;
1701
1702 unsigned pmap_mapping_thread_wakeups;
1703 unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA;
1704 unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA;
1705 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
1706
1707
1708 static void
1709 pv_init(
1710 void)
1711 {
1712 simple_lock_init(&pv_free_list_lock, 0);
1713 simple_lock_init(&pv_kern_free_list_lock, 0);
1714 pv_free_list = PV_ENTRY_NULL;
1715 pv_free_count = 0x0U;
1716 pv_kern_free_list = PV_ENTRY_NULL;
1717 pv_kern_free_count = 0x0U;
1718 }
1719
1720 static inline void PV_ALLOC(pv_entry_t **pv_ep);
1721 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
1722 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
1723 static inline void PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
1724
1725 static inline void pmap_pv_throttle(pmap_t p);
1726
1727 static boolean_t
1728 pv_alloc(
1729 pmap_t pmap,
1730 unsigned int pai,
1731 pv_entry_t **pvepp)
1732 {
1733 PMAP_ASSERT_LOCKED(pmap);
1734 ASSERT_PVH_LOCKED(pai);
1735 PV_ALLOC(pvepp);
1736 if (PV_ENTRY_NULL == *pvepp) {
1737
1738 if (kernel_pmap == pmap) {
1739
1740 PV_KERN_ALLOC(pvepp);
1741
1742 if (PV_ENTRY_NULL == *pvepp) {
1743 pv_entry_t *pv_e;
1744 pv_entry_t *pv_eh;
1745 pv_entry_t *pv_et;
1746 int pv_cnt;
1747 unsigned j;
1748 pmap_paddr_t pa;
1749 kern_return_t ret;
1750
1751 UNLOCK_PVH(pai);
1752 PMAP_UNLOCK(pmap);
1753
1754 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
1755
1756 if (ret == KERN_RESOURCE_SHORTAGE) {
1757 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
1758 }
1759
1760 if (ret != KERN_SUCCESS) {
1761 panic("%s: failed to alloc page for kernel, ret=%d, "
1762 "pmap=%p, pai=%u, pvepp=%p",
1763 __FUNCTION__, ret,
1764 pmap, pai, pvepp);
1765 }
1766
1767 pv_page_count++;
1768
1769 pv_e = (pv_entry_t *)phystokv(pa);
1770 pv_cnt = 0;
1771 pv_eh = pv_et = PV_ENTRY_NULL;
1772 *pvepp = pv_e;
1773 pv_e++;
1774
1775 for (j = 1; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
1776 pv_e->pve_next = pv_eh;
1777 pv_eh = pv_e;
1778
1779 if (pv_et == PV_ENTRY_NULL)
1780 pv_et = pv_e;
1781 pv_cnt++;
1782 pv_e++;
1783 }
1784 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
1785 PMAP_LOCK(pmap);
1786 LOCK_PVH(pai);
1787 return FALSE;
1788 }
1789 } else {
1790 UNLOCK_PVH(pai);
1791 PMAP_UNLOCK(pmap);
1792 pmap_pv_throttle(pmap);
1793 {
1794 pv_entry_t *pv_e;
1795 pv_entry_t *pv_eh;
1796 pv_entry_t *pv_et;
1797 int pv_cnt;
1798 unsigned j;
1799 pmap_paddr_t pa;
1800 kern_return_t ret;
1801
1802 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
1803
1804 if (ret != KERN_SUCCESS) {
1805 panic("%s: failed to alloc page, ret=%d, "
1806 "pmap=%p, pai=%u, pvepp=%p",
1807 __FUNCTION__, ret,
1808 pmap, pai, pvepp);
1809 }
1810
1811 pv_page_count++;
1812
1813 pv_e = (pv_entry_t *)phystokv(pa);
1814 pv_cnt = 0;
1815 pv_eh = pv_et = PV_ENTRY_NULL;
1816 *pvepp = pv_e;
1817 pv_e++;
1818
1819 for (j = 1; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
1820 pv_e->pve_next = pv_eh;
1821 pv_eh = pv_e;
1822
1823 if (pv_et == PV_ENTRY_NULL)
1824 pv_et = pv_e;
1825 pv_cnt++;
1826 pv_e++;
1827 }
1828 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
1829 }
1830 PMAP_LOCK(pmap);
1831 LOCK_PVH(pai);
1832 return FALSE;
1833 }
1834 }
1835 assert(PV_ENTRY_NULL != *pvepp);
1836 return TRUE;
1837 }
1838
1839 static void
1840 pv_free(
1841 pv_entry_t *pvep)
1842 {
1843 PV_FREE_LIST(pvep, pvep, 1);
1844 }
1845
1846 static void
1847 pv_list_free(
1848 pv_entry_t *pvehp,
1849 pv_entry_t *pvetp,
1850 unsigned int cnt)
1851 {
1852 PV_FREE_LIST(pvehp, pvetp, cnt);
1853 }
1854
1855
1856
1857 static inline void PV_ALLOC(pv_entry_t **pv_ep) {
1858 assert(*pv_ep == PV_ENTRY_NULL);
1859 simple_lock(&pv_free_list_lock);
1860 /*
1861 * If the kernel reserved pool is low, let non-kernel mappings allocate
1862 * synchronously, possibly subject to a throttle.
1863 */
1864 if ((pv_kern_free_count >= pv_kern_low_water_mark) && ((*pv_ep = pv_free_list) != 0)) {
1865 pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
1866 (*pv_ep)->pve_next = PV_ENTRY_NULL;
1867 pv_free_count--;
1868 }
1869
1870 simple_unlock(&pv_free_list_lock);
1871
1872 if ((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark)) {
1873 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
1874 thread_wakeup(&mapping_replenish_event);
1875 }
1876 }
1877
1878 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt) {
1879 simple_lock(&pv_free_list_lock);
1880 pv_et->pve_next = (pv_entry_t *)pv_free_list;
1881 pv_free_list = pv_eh;
1882 pv_free_count += pv_cnt;
1883 simple_unlock(&pv_free_list_lock);
1884 }
1885
1886 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e) {
1887 assert(*pv_e == PV_ENTRY_NULL);
1888 simple_lock(&pv_kern_free_list_lock);
1889
1890 if ((*pv_e = pv_kern_free_list) != 0) {
1891 pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
1892 (*pv_e)->pve_next = PV_ENTRY_NULL;
1893 pv_kern_free_count--;
1894 pmap_kern_reserve_alloc_stat++;
1895 }
1896
1897 simple_unlock(&pv_kern_free_list_lock);
1898
1899 if (pv_kern_free_count < pv_kern_low_water_mark) {
1900 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse)) {
1901 thread_wakeup(&mapping_replenish_event);
1902 }
1903 }
1904 }
1905
1906 static inline void PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt) {
1907 simple_lock(&pv_kern_free_list_lock);
1908 pv_et->pve_next = pv_kern_free_list;
1909 pv_kern_free_list = pv_eh;
1910 pv_kern_free_count += pv_cnt;
1911 simple_unlock(&pv_kern_free_list_lock);
1912 }
1913
1914 static inline void pmap_pv_throttle(__unused pmap_t p) {
1915 assert(p != kernel_pmap);
1916 /* Apply throttle on non-kernel mappings */
1917 if (pv_kern_free_count < (pv_kern_low_water_mark / 2)) {
1918 pmap_pv_throttle_stat++;
1919 /* This doesn't need to be strictly accurate, merely a hint
1920 * to eliminate the timeout when the reserve is replenished.
1921 */
1922 pmap_pv_throttled_waiters++;
1923 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
1924 thread_block(THREAD_CONTINUE_NULL);
1925 }
1926 }
1927
1928 /*
1929 * Creates a target number of free pv_entry_t objects for the kernel free list
1930 * and the general free list.
1931 */
1932 static kern_return_t
1933 mapping_free_prime_internal(void)
1934 {
1935 unsigned j;
1936 pmap_paddr_t pa;
1937 kern_return_t ret;
1938 pv_entry_t *pv_e;
1939 pv_entry_t *pv_eh;
1940 pv_entry_t *pv_et;
1941 int pv_cnt;
1942 int alloc_options = 0;
1943 int needed_pv_cnt = 0;
1944 int target_pv_free_cnt = 0;
1945
1946 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
1947 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
1948
1949 if (mapping_free_prime_internal_done) {
1950 return KERN_FAILURE;
1951 }
1952
1953 if (!mapping_free_prime_internal_called) {
1954 mapping_free_prime_internal_called = TRUE;
1955
1956 pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
1957
1958 /* Alterable via sysctl */
1959 pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
1960
1961 pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
1962 pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
1963 }
1964
1965 pv_cnt = 0;
1966 pv_eh = pv_et = PV_ENTRY_NULL;
1967 target_pv_free_cnt = PV_ALLOC_INITIAL_TARGET;
1968
1969 /*
1970 * We don't take the lock to read pv_free_count, as we should not be
1971 * invoking this from a multithreaded context.
1972 */
1973 needed_pv_cnt = target_pv_free_cnt - pv_free_count;
1974
1975 if (needed_pv_cnt > target_pv_free_cnt) {
1976 needed_pv_cnt = 0;
1977 }
1978
1979 while (pv_cnt < needed_pv_cnt) {
1980 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
1981
1982 assert(ret == KERN_SUCCESS);
1983
1984 pv_page_count++;
1985
1986 pv_e = (pv_entry_t *)phystokv(pa);
1987
1988 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
1989 pv_e->pve_next = pv_eh;
1990 pv_eh = pv_e;
1991
1992 if (pv_et == PV_ENTRY_NULL)
1993 pv_et = pv_e;
1994 pv_cnt++;
1995 pv_e++;
1996 }
1997 }
1998
1999 if (pv_cnt) {
2000 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2001 }
2002
2003 pv_cnt = 0;
2004 pv_eh = pv_et = PV_ENTRY_NULL;
2005 target_pv_free_cnt = PV_KERN_ALLOC_INITIAL_TARGET;
2006
2007 /*
2008 * We don't take the lock to read pv_kern_free_count, as we should not
2009 * be invoking this from a multithreaded context.
2010 */
2011 needed_pv_cnt = target_pv_free_cnt - pv_kern_free_count;
2012
2013 if (needed_pv_cnt > target_pv_free_cnt) {
2014 needed_pv_cnt = 0;
2015 }
2016
2017 while (pv_cnt < needed_pv_cnt) {
2018
2019 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
2020
2021 assert(ret == KERN_SUCCESS);
2022 pv_page_count++;
2023
2024 pv_e = (pv_entry_t *)phystokv(pa);
2025
2026 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
2027 pv_e->pve_next = pv_eh;
2028 pv_eh = pv_e;
2029
2030 if (pv_et == PV_ENTRY_NULL)
2031 pv_et = pv_e;
2032 pv_cnt++;
2033 pv_e++;
2034 }
2035 }
2036
2037 if (pv_cnt) {
2038 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2039 }
2040
2041 mapping_free_prime_internal_done = TRUE;
2042 return KERN_SUCCESS;
2043 }
2044
2045 void
2046 mapping_free_prime(void)
2047 {
2048 kern_return_t kr = KERN_FAILURE;
2049
2050 kr = mapping_free_prime_internal();
2051
2052 if (kr != KERN_SUCCESS) {
2053 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2054 }
2055 }
2056
2057 void mapping_replenish(void);
2058
2059 void mapping_adjust(void) {
2060 kern_return_t mres;
2061
2062 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
2063 if (mres != KERN_SUCCESS) {
2064 panic("pmap: mapping_replenish thread creation failed");
2065 }
2066 thread_deallocate(mapping_replenish_thread);
2067 }
2068
2069 /*
2070 * Fills the kernel and general PV free lists back up to their low watermarks.
2071 */
2072 static kern_return_t
2073 mapping_replenish_internal(void)
2074 {
2075 pv_entry_t *pv_e;
2076 pv_entry_t *pv_eh;
2077 pv_entry_t *pv_et;
2078 int pv_cnt;
2079 unsigned j;
2080 pmap_paddr_t pa;
2081 kern_return_t ret = KERN_SUCCESS;
2082
2083 while (pv_kern_free_count < pv_kern_low_water_mark) {
2084 pv_cnt = 0;
2085 pv_eh = pv_et = PV_ENTRY_NULL;
2086
2087 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2088 assert(ret == KERN_SUCCESS);
2089
2090 pv_page_count++;
2091
2092 pv_e = (pv_entry_t *)phystokv(pa);
2093
2094 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
2095 pv_e->pve_next = pv_eh;
2096 pv_eh = pv_e;
2097
2098 if (pv_et == PV_ENTRY_NULL)
2099 pv_et = pv_e;
2100 pv_cnt++;
2101 pv_e++;
2102 }
2103 pmap_kernel_reserve_replenish_stat += pv_cnt;
2104 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2105 }
2106
2107 while (pv_free_count < pv_low_water_mark) {
2108 pv_cnt = 0;
2109 pv_eh = pv_et = PV_ENTRY_NULL;
2110
2111 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2112 assert(ret == KERN_SUCCESS);
2113
2114 pv_page_count++;
2115
2116 pv_e = (pv_entry_t *)phystokv(pa);
2117
2118 for (j = 0; j < (PAGE_SIZE/sizeof(pv_entry_t)) ; j++) {
2119 pv_e->pve_next = pv_eh;
2120 pv_eh = pv_e;
2121
2122 if (pv_et == PV_ENTRY_NULL)
2123 pv_et = pv_e;
2124 pv_cnt++;
2125 pv_e++;
2126 }
2127 pmap_user_reserve_replenish_stat += pv_cnt;
2128 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2129 }
2130
2131 return ret;
2132 }
2133
2134 /*
2135 * Continuation function that keeps the PV free lists from running out of free
2136 * elements.
2137 */
2138 __attribute__((noreturn))
2139 void
2140 mapping_replenish(void)
2141 {
2142 kern_return_t kr;
2143
2144 /* We qualify for VM privileges...*/
2145 current_thread()->options |= TH_OPT_VMPRIV;
2146
2147 for (;;) {
2148 kr = mapping_replenish_internal();
2149
2150 if (kr != KERN_SUCCESS) {
2151 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2152 }
2153
2154 /*
2155 * Wake threads throttled while the kernel reserve was being replenished.
2156 */
2157 if (pmap_pv_throttled_waiters) {
2158 pmap_pv_throttled_waiters = 0;
2159 thread_wakeup(&pmap_user_pv_throttle_event);
2160 }
2161
2162 /* Check if the kernel pool has been depleted since the
2163 * first pass, to reduce refill latency.
2164 */
2165 if (pv_kern_free_count < pv_kern_low_water_mark)
2166 continue;
2167 /* Block sans continuation to avoid yielding kernel stack */
2168 assert_wait(&mapping_replenish_event, THREAD_UNINT);
2169 mappingrecurse = 0;
2170 thread_block(THREAD_CONTINUE_NULL);
2171 pmap_mapping_thread_wakeups++;
2172 }
2173 }
2174
2175
2176 static void
2177 ptd_bootstrap(
2178 pt_desc_t *ptdp,
2179 unsigned int ptd_cnt)
2180 {
2181 simple_lock_init(&ptd_free_list_lock, 0);
2182 while (ptd_cnt != 0) {
2183 (*(void **)ptdp) = (void *)ptd_free_list;
2184 ptd_free_list = ptdp;
2185 ptdp++;
2186 ptd_cnt--;
2187 ptd_free_count++;
2188 }
2189 ptd_preboot = FALSE;
2190 }
2191
2192 static pt_desc_t
2193 *ptd_alloc(
2194 pmap_t pmap)
2195 {
2196 pt_desc_t *ptdp;
2197 unsigned i;
2198
2199 if (!ptd_preboot)
2200 simple_lock(&ptd_free_list_lock);
2201
2202 if (ptd_free_count == 0) {
2203 unsigned int ptd_cnt;
2204 pt_desc_t *ptdp_next;
2205
2206 if (ptd_preboot) {
2207 ptdp = (pt_desc_t *)avail_start;
2208 avail_start += ARM_PGBYTES;
2209 ptdp_next = ptdp;
2210 ptd_cnt = ARM_PGBYTES/sizeof(pt_desc_t);
2211 } else {
2212 pmap_paddr_t pa;
2213 kern_return_t ret;
2214
2215 simple_unlock(&ptd_free_list_lock);
2216
2217 if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
2218 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
2219 assert(ret == KERN_SUCCESS);
2220 }
2221 ptdp = (pt_desc_t *)phystokv(pa);
2222
2223 simple_lock(&ptd_free_list_lock);
2224 ptdp_next = ptdp;
2225 ptd_cnt = PAGE_SIZE/sizeof(pt_desc_t);
2226 }
2227
2228 while (ptd_cnt != 0) {
2229 (*(void **)ptdp_next) = (void *)ptd_free_list;
2230 ptd_free_list = ptdp_next;
2231 ptdp_next++;
2232 ptd_cnt--;
2233 ptd_free_count++;
2234 }
2235 }
2236
2237 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
2238 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
2239 ptd_free_count--;
2240 } else {
2241 panic("out of ptd entry\n");
2242 }
2243
2244 if (!ptd_preboot)
2245 simple_unlock(&ptd_free_list_lock);
2246
2247 ptdp->pt_page.next = NULL;
2248 ptdp->pt_page.prev = NULL;
2249 ptdp->pmap = pmap;
2250
2251 for (i = 0 ; i < PT_INDEX_MAX ; i++) {
2252 ptdp->pt_map[i].va = 0;
2253 ptdp->pt_cnt[i].refcnt = 0;
2254 ptdp->pt_cnt[i].wiredcnt = 0;
2255 }
2256 simple_lock(&pt_pages_lock);
2257 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2258 simple_unlock(&pt_pages_lock);
2259
2260 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
2261
2262 return(ptdp);
2263 }
2264
2265 static void
2266 ptd_deallocate(
2267 pt_desc_t *ptdp)
2268 {
2269 unsigned i;
2270 pmap_t pmap = ptdp->pmap;
2271
2272 if (ptd_preboot) {
2273 panic("ptd_deallocate(): early boot\n");
2274 }
2275 for (i = 0 ; i < PT_INDEX_MAX ; i++) {
2276 if (ptdp->pt_cnt[i].refcnt != 0)
2277 panic("ptd_deallocate(): ptdp=%p refcnt=0x%x \n", ptdp, ptdp->pt_cnt[i].refcnt);
2278 }
2279
2280 if (ptdp->pt_page.next != NULL) {
2281 simple_lock(&pt_pages_lock);
2282 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2283 simple_unlock(&pt_pages_lock);
2284 }
2285 simple_lock(&ptd_free_list_lock);
2286 (*(void **)ptdp) = (void *)ptd_free_list;
2287 ptd_free_list = (pt_desc_t *)ptdp;
2288 ptd_free_count++;
2289 simple_unlock(&ptd_free_list_lock);
2290 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
2291 }
2292
2293 static void
2294 ptd_init(
2295 pt_desc_t *ptdp,
2296 pmap_t pmap,
2297 vm_map_address_t va,
2298 unsigned int level,
2299 pt_entry_t *pte_p)
2300 {
2301 if (ptdp->pmap != pmap)
2302 panic("ptd_init(): pmap mismatch\n");
2303
2304 #if (__ARM_VMSA__ == 7)
2305 assert(level == 2);
2306 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
2307 #else
2308 if (level == 3) {
2309 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~ARM_TT_L2_OFFMASK ;
2310 } else if (level == 2)
2311 ptdp->pt_map[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~ARM_TT_L1_OFFMASK ;
2312 #endif
2313 if (level < PMAP_TT_MAX_LEVEL)
2314 ptdp->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
2315
2316 }
2317
2318
2319 boolean_t
2320 pmap_valid_address(
2321 pmap_paddr_t addr)
2322 {
2323 return pa_valid(addr);
2324 }
2325
2326 #if (__ARM_VMSA__ == 7)
2327
2328 /*
2329 * Given an offset and a map, compute the address of the
2330 * corresponding translation table entry.
2331 */
2332 static inline tt_entry_t *
2333 pmap_tte(pmap_t pmap,
2334 vm_map_address_t addr)
2335 {
2336 if (!(tte_index(pmap, addr) < pmap->tte_index_max))
2337 return (tt_entry_t *)NULL;
2338 return (&pmap->tte[tte_index(pmap, addr)]);
2339 }
2340
2341
2342 /*
2343 * Given an offset and a map, compute the address of the
2344 * pte. If the address is invalid with respect to the map
2345 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2346 *
2347 * This is only used internally.
2348 */
2349 static inline pt_entry_t *
2350 pmap_pte(
2351 pmap_t pmap,
2352 vm_map_address_t addr)
2353 {
2354 pt_entry_t *ptp;
2355 tt_entry_t *ttp;
2356 tt_entry_t tte;
2357
2358 ttp = pmap_tte(pmap, addr);
2359 if (ttp == (tt_entry_t *)NULL)
2360 return (PT_ENTRY_NULL);
2361 tte = *ttp;
2362 #if MACH_ASSERT
2363 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK)
2364 panic("Attempt to demote L1 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2365 #endif
2366 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)
2367 return (PT_ENTRY_NULL);
2368 ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
2369 return (ptp);
2370 }
2371
2372 #else
2373
2374 /*
2375 * Given an offset and a map, compute the address of level 1 translation table entry.
2376 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2377 */
2378 static inline tt_entry_t *
2379 pmap_tt1e(pmap_t pmap,
2380 vm_map_address_t addr)
2381 {
2382 #if __ARM64_TWO_LEVEL_PMAP__
2383 #pragma unused(pmap, addr)
2384 panic("pmap_tt1e called on a two level pmap");
2385 return (NULL);
2386 #else
2387 return (&pmap->tte[tt1_index(pmap, addr)]);
2388 #endif
2389 }
2390
2391 /*
2392 * Given an offset and a map, compute the address of level 2 translation table entry.
2393 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2394 */
2395 static inline tt_entry_t *
2396 pmap_tt2e(pmap_t pmap,
2397 vm_map_address_t addr)
2398 {
2399 #if __ARM64_TWO_LEVEL_PMAP__
2400 return (&pmap->tte[tt2_index(pmap, addr)]);
2401 #else
2402 tt_entry_t *ttp;
2403 tt_entry_t tte;
2404
2405 ttp = pmap_tt1e(pmap, addr);
2406 tte = *ttp;
2407 #if MACH_ASSERT
2408 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID))
2409 panic("Attempt to demote L1 block (?!): pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2410 #endif
2411 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID))
2412 return (PT_ENTRY_NULL);
2413
2414 ttp = &((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, addr)];
2415 return ((tt_entry_t *)ttp);
2416 #endif
2417 }
2418
2419
2420 /*
2421 * Given an offset and a map, compute the address of level 3 translation table entry.
2422 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2423 */
2424 static inline pt_entry_t *
2425 pmap_tt3e(
2426 pmap_t pmap,
2427 vm_map_address_t addr)
2428 {
2429 pt_entry_t *ptp;
2430 tt_entry_t *ttp;
2431 tt_entry_t tte;
2432
2433 /* Level 0 currently unused */
2434 #if __ARM64_TWO_LEVEL_PMAP__
2435 ttp = pmap_tt2e(pmap, addr);
2436 tte = *ttp;
2437 #else
2438 /* Get first-level (1GB) entry */
2439 ttp = pmap_tt1e(pmap, addr);
2440 tte = *ttp;
2441 #if MACH_ASSERT
2442 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID))
2443 panic("Attempt to demote L1 block (?!): pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2444 #endif
2445 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID))
2446 return (PT_ENTRY_NULL);
2447
2448 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, addr)];
2449 #endif
2450 #if MACH_ASSERT
2451 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID))
2452 panic("Attempt to demote L2 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap, (uint64_t)addr, (uint64_t)tte);
2453 #endif
2454 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
2455 return (PT_ENTRY_NULL);
2456 }
2457
2458 /* Get third-level (4KB) entry */
2459 ptp = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, addr)]);
2460 return (ptp);
2461 }
2462
2463
2464 static inline tt_entry_t *
2465 pmap_tte(
2466 pmap_t pmap,
2467 vm_map_address_t addr)
2468 {
2469 return(pmap_tt2e(pmap, addr));
2470 }
2471
2472
2473 static inline pt_entry_t *
2474 pmap_pte(
2475 pmap_t pmap,
2476 vm_map_address_t addr)
2477 {
2478 return(pmap_tt3e(pmap, addr));
2479 }
2480
2481 #endif
2482
2483
2484 /*
2485 * Map memory at initialization. The physical addresses being
2486 * mapped are not managed and are never unmapped.
2487 *
2488 * For now, VM is already on, we only need to map the
2489 * specified memory.
2490 */
2491 vm_map_address_t
2492 pmap_map(
2493 vm_map_address_t virt,
2494 vm_offset_t start,
2495 vm_offset_t end,
2496 vm_prot_t prot,
2497 unsigned int flags)
2498 {
2499 kern_return_t kr;
2500 vm_size_t ps;
2501
2502 ps = PAGE_SIZE;
2503 while (start < end) {
2504 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
2505 prot, VM_PROT_NONE, flags, FALSE);
2506
2507 if (kr != KERN_SUCCESS) {
2508 panic("%s: failed pmap_enter, "
2509 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
2510 __FUNCTION__,
2511 (void *) virt, (void *) start, (void *) end, prot, flags);
2512 }
2513
2514 virt += ps;
2515 start += ps;
2516 }
2517 return (virt);
2518 }
2519
2520 vm_map_address_t
2521 pmap_map_bd_with_options(
2522 vm_map_address_t virt,
2523 vm_offset_t start,
2524 vm_offset_t end,
2525 vm_prot_t prot,
2526 int32_t options)
2527 {
2528 pt_entry_t tmplate;
2529 pt_entry_t *ptep;
2530 vm_map_address_t vaddr;
2531 vm_offset_t paddr;
2532 pt_entry_t mem_attr;
2533
2534 switch (options & PMAP_MAP_BD_MASK) {
2535 case PMAP_MAP_BD_WCOMB:
2536 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
2537 #if (__ARM_VMSA__ > 7)
2538 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
2539 #else
2540 mem_attr |= ARM_PTE_SH;
2541 #endif
2542 break;
2543 case PMAP_MAP_BD_POSTED:
2544 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
2545 break;
2546 default:
2547 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2548 break;
2549 }
2550
2551 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
2552 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
2553 #if __ARM_KERNEL_PROTECT__
2554 tmplate |= ARM_PTE_NG;
2555 #endif /* __ARM_KERNEL_PROTECT__ */
2556
2557 vaddr = virt;
2558 paddr = start;
2559 while (paddr < end) {
2560
2561 ptep = pmap_pte(kernel_pmap, vaddr);
2562 if (ptep == PT_ENTRY_NULL) {
2563 panic("pmap_map_bd");
2564 }
2565 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2566 WRITE_PTE(ptep, tmplate);
2567
2568 pte_increment_pa(tmplate);
2569 vaddr += PAGE_SIZE;
2570 paddr += PAGE_SIZE;
2571 }
2572
2573 if (end >= start)
2574 flush_mmu_tlb_region(virt, (unsigned)(end - start));
2575
2576 return (vaddr);
2577 }
2578
2579 /*
2580 * Back-door routine for mapping kernel VM at initialization.
2581 * Useful for mapping memory outside the range
2582 * [vm_first_phys, vm_last_phys] (i.e., devices).
2583 * Otherwise like pmap_map.
2584 */
2585 vm_map_address_t
2586 pmap_map_bd(
2587 vm_map_address_t virt,
2588 vm_offset_t start,
2589 vm_offset_t end,
2590 vm_prot_t prot)
2591 {
2592 pt_entry_t tmplate;
2593 pt_entry_t *ptep;
2594 vm_map_address_t vaddr;
2595 vm_offset_t paddr;
2596
2597 /* not cacheable and not buffered */
2598 tmplate = pa_to_pte(start)
2599 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
2600 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
2601 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2602 #if __ARM_KERNEL_PROTECT__
2603 tmplate |= ARM_PTE_NG;
2604 #endif /* __ARM_KERNEL_PROTECT__ */
2605
2606 vaddr = virt;
2607 paddr = start;
2608 while (paddr < end) {
2609
2610 ptep = pmap_pte(kernel_pmap, vaddr);
2611 if (ptep == PT_ENTRY_NULL) {
2612 panic("pmap_map_bd");
2613 }
2614 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2615 WRITE_PTE(ptep, tmplate);
2616
2617 pte_increment_pa(tmplate);
2618 vaddr += PAGE_SIZE;
2619 paddr += PAGE_SIZE;
2620 }
2621
2622 if (end >= start)
2623 flush_mmu_tlb_region(virt, (unsigned)(end - start));
2624
2625 return (vaddr);
2626 }
2627
2628 /*
2629 * Back-door routine for mapping kernel VM at initialization.
2630 * Useful for mapping memory specific physical addresses in early
2631 * boot (i.e., before kernel_map is initialized).
2632 *
2633 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
2634 */
2635
2636 vm_map_address_t
2637 pmap_map_high_window_bd(
2638 vm_offset_t pa_start,
2639 vm_size_t len,
2640 vm_prot_t prot)
2641 {
2642 pt_entry_t *ptep, pte;
2643 #if (__ARM_VMSA__ == 7)
2644 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
2645 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
2646 #else
2647 vm_map_address_t va_start = VREGION1_START;
2648 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
2649 #endif
2650 vm_map_address_t va_end;
2651 vm_map_address_t va;
2652 vm_size_t offset;
2653
2654 offset = pa_start & PAGE_MASK;
2655 pa_start -= offset;
2656 len += offset;
2657
2658 if (len > (va_max - va_start)) {
2659 panic("pmap_map_high_window_bd: area too large\n");
2660 }
2661
2662 scan:
2663 for ( ; va_start < va_max; va_start += PAGE_SIZE) {
2664 ptep = pmap_pte(kernel_pmap, va_start);
2665 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2666 if (*ptep == ARM_PTE_TYPE_FAULT)
2667 break;
2668 }
2669 if (va_start > va_max) {
2670 panic("pmap_map_high_window_bd: insufficient pages\n");
2671 }
2672
2673 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
2674 ptep = pmap_pte(kernel_pmap, va_end);
2675 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
2676 if (*ptep != ARM_PTE_TYPE_FAULT) {
2677 va_start = va_end + PAGE_SIZE;
2678 goto scan;
2679 }
2680 }
2681
2682 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
2683 ptep = pmap_pte(kernel_pmap, va);
2684 pte = pa_to_pte(pa_start)
2685 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
2686 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
2687 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
2688 #if (__ARM_VMSA__ > 7)
2689 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
2690 #else
2691 pte |= ARM_PTE_SH;
2692 #endif
2693 #if __ARM_KERNEL_PROTECT__
2694 pte |= ARM_PTE_NG;
2695 #endif /* __ARM_KERNEL_PROTECT__ */
2696 WRITE_PTE(ptep, pte);
2697 }
2698 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len);
2699 #if KASAN
2700 kasan_notify_address(va_start, len);
2701 #endif
2702 return va_start;
2703 }
2704
2705 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
2706
2707 typedef struct pmap_io_range
2708 {
2709 uint64_t addr;
2710 uint32_t len;
2711 uint32_t wimg;
2712 } __attribute__((packed)) pmap_io_range_t;
2713
2714 static unsigned int
2715 pmap_compute_io_rgns(void)
2716 {
2717 DTEntry entry;
2718 pmap_io_range_t *ranges;
2719 void *prop = NULL;
2720 int err;
2721 unsigned int prop_size;
2722
2723 err = DTLookupEntry(NULL, "/defaults", &entry);
2724 assert(err == kSuccess);
2725
2726 if (kSuccess != DTGetProperty(entry, "pmap-io-granule", &prop, &prop_size))
2727 return 0;
2728
2729 io_rgn_granule = *((uint32_t*)prop);
2730
2731 if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size))
2732 return 0;
2733
2734 if ((io_rgn_granule == 0) || (io_rgn_granule & PAGE_MASK))
2735 panic("pmap I/O region granularity is not page-aligned!\n");
2736
2737 ranges = prop;
2738 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
2739 if ((i == 0) || (ranges[i].addr < io_rgn_start))
2740 io_rgn_start = ranges[i].addr;
2741 if ((i == 0) || ((ranges[i].addr + ranges[i].len) > io_rgn_end))
2742 io_rgn_end = ranges[i].addr + ranges[i].len;
2743 }
2744
2745 if (io_rgn_start & PAGE_MASK)
2746 panic("pmap I/O region start is not page-aligned!\n");
2747
2748 if (io_rgn_end & PAGE_MASK)
2749 panic("pmap I/O region end is not page-aligned!\n");
2750
2751 if (((io_rgn_start < gPhysBase) && (io_rgn_end >= gPhysBase)) ||
2752 ((io_rgn_start < avail_end) && (io_rgn_end >= avail_end)))
2753 panic("pmap I/O region overlaps physical memory!\n");
2754
2755 return (unsigned int)((io_rgn_end - io_rgn_start) / io_rgn_granule);
2756 }
2757
2758 static void
2759 pmap_load_io_rgns(void)
2760 {
2761 DTEntry entry;
2762 pmap_io_range_t *ranges;
2763 void *prop = NULL;
2764 int err;
2765 unsigned int prop_size;
2766
2767 if (io_rgn_granule == 0)
2768 return;
2769
2770 err = DTLookupEntry(NULL, "/defaults", &entry);
2771 assert(err == kSuccess);
2772
2773 err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
2774 assert(err == kSuccess);
2775
2776 ranges = prop;
2777 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
2778 if ((ranges[i].addr - io_rgn_start) % io_rgn_granule)
2779 panic("pmap I/O region %d is not aligned to I/O granularity!\n", i);
2780 if (ranges[i].len % io_rgn_granule)
2781 panic("pmap I/O region %d size is not a multiple of I/O granularity!\n", i);
2782 for (uint32_t offs = 0; offs < ranges[i].len; offs += io_rgn_granule) {
2783 io_attr_table[(ranges[i].addr + offs - io_rgn_start) / io_rgn_granule] =
2784 IO_ATTR_WIMG(ranges[i].wimg);
2785 }
2786 }
2787 }
2788
2789
2790 /*
2791 * Bootstrap the system enough to run with virtual memory.
2792 *
2793 * The early VM initialization code has already allocated
2794 * the first CPU's translation table and made entries for
2795 * all the one-to-one mappings to be found there.
2796 *
2797 * We must set up the kernel pmap structures, the
2798 * physical-to-virtual translation lookup tables for the
2799 * physical memory to be managed (between avail_start and
2800 * avail_end).
2801
2802 * Map the kernel's code and data, and allocate the system page table.
2803 * Page_size must already be set.
2804 *
2805 * Parameters:
2806 * first_avail first available physical page -
2807 * after kernel page tables
2808 * avail_start PA of first managed physical page
2809 * avail_end PA of last managed physical page
2810 */
2811
2812 void
2813 pmap_bootstrap(
2814 vm_offset_t vstart)
2815 {
2816 pmap_paddr_t pmap_struct_start;
2817 vm_size_t pv_head_size;
2818 vm_size_t pv_lock_table_size;
2819 vm_size_t ptd_root_table_size;
2820 vm_size_t pp_attr_table_size;
2821 vm_size_t io_attr_table_size;
2822 unsigned int niorgns;
2823 unsigned int npages;
2824 unsigned int i;
2825 vm_map_offset_t maxoffset;
2826
2827
2828 #ifdef PMAP_TRACES
2829 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
2830 kprintf("Kernel traces for pmap operations enabled\n");
2831 }
2832 #endif
2833
2834 /*
2835 * Initialize the kernel pmap.
2836 */
2837 pmap_stamp = 1;
2838 kernel_pmap->tte = cpu_tte;
2839 kernel_pmap->ttep = cpu_ttep;
2840 #if (__ARM_VMSA__ > 7)
2841 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
2842 #else
2843 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
2844 #endif
2845 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
2846 kernel_pmap->wired = 0;
2847 kernel_pmap->ref_count = 1;
2848 kernel_pmap->gc_status = 0;
2849 kernel_pmap->nx_enabled = TRUE;
2850 #ifdef __arm64__
2851 kernel_pmap->is_64bit = TRUE;
2852 #else
2853 kernel_pmap->is_64bit = FALSE;
2854 #endif
2855 kernel_pmap->stamp = hw_atomic_add(&pmap_stamp, 1);
2856
2857 kernel_pmap->nested_region_grand_addr = 0x0ULL;
2858 kernel_pmap->nested_region_subord_addr = 0x0ULL;
2859 kernel_pmap->nested_region_size = 0x0ULL;
2860 kernel_pmap->nested_region_asid_bitmap = NULL;
2861 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
2862
2863 #if (__ARM_VMSA__ == 7)
2864 kernel_pmap->tte_index_max = 4*NTTES;
2865 #else
2866 kernel_pmap->tte_index_max = (ARM_PGBYTES / sizeof(tt_entry_t));
2867 #endif
2868 kernel_pmap->prev_tte = (tt_entry_t *) NULL;
2869 kernel_pmap->cpu_ref = 0;
2870
2871 PMAP_LOCK_INIT(kernel_pmap);
2872 #if (__ARM_VMSA__ == 7)
2873 simple_lock_init(&kernel_pmap->tt1_lock, 0);
2874 #endif
2875 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
2876
2877 /* allocate space for and initialize the bookkeeping structures */
2878 niorgns = pmap_compute_io_rgns();
2879 npages = (unsigned int)atop(mem_size);
2880 pp_attr_table_size = npages * sizeof(pp_attr_t);
2881 io_attr_table_size = niorgns * sizeof(io_attr_t);
2882 pv_lock_table_size = npages;
2883 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
2884 #if (__ARM_VMSA__ == 7)
2885 ptd_root_table_size = sizeof(pt_desc_t) * (1<<((mem_size>>30)+12));
2886 #else
2887 ptd_root_table_size = sizeof(pt_desc_t) * (1<<((mem_size>>30)+13));
2888 #endif
2889
2890 pmap_struct_start = avail_start;
2891
2892 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
2893 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
2894 io_attr_table = (io_attr_t *) phystokv(avail_start);
2895 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size + pv_lock_table_size, __alignof(pv_entry_t*));
2896 pv_head_table = (pv_entry_t **) phystokv(avail_start);
2897 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
2898 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
2899 avail_start = round_page(avail_start + ptd_root_table_size);
2900
2901 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
2902
2903 pmap_load_io_rgns();
2904 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size/sizeof(pt_desc_t)));
2905
2906 pmap_cpu_data_array_init();
2907
2908 vm_first_phys = gPhysBase;
2909 vm_last_phys = trunc_page(avail_end);
2910
2911 simple_lock_init(&pmaps_lock, 0);
2912 queue_init(&map_pmap_list);
2913 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
2914 queue_init(&tt_pmap_list);
2915 tt_pmap_count = 0;
2916 tt_pmap_max = 0;
2917 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
2918 free_page_size_tt_count = 0;
2919 free_page_size_tt_max = 0;
2920 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
2921 free_two_page_size_tt_count = 0;
2922 free_two_page_size_tt_max = 0;
2923 free_tt_list = TT_FREE_ENTRY_NULL;
2924 free_tt_count = 0;
2925 free_tt_max = 0;
2926
2927 simple_lock_init(&pt_pages_lock, 0);
2928 queue_init(&pt_page_list);
2929
2930 simple_lock_init(&pmap_pages_lock, 0);
2931 pmap_pages_request_count = 0;
2932 pmap_pages_request_acum = 0;
2933 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
2934
2935 virtual_space_start = vstart;
2936 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
2937
2938 /* mark all the address spaces in use */
2939 for (i = 0; i < MAX_ASID / (sizeof(uint32_t) * NBBY); i++)
2940 asid_bitmap[i] = 0xffffffff;
2941
2942 /*
2943 * The kernel gets ASID 0, and all aliases of it. This is
2944 * important because ASID 0 is global; if we vend ASID 0
2945 * out to a user pmap, those translations will show up in
2946 * other processes through the TLB.
2947 */
2948 for (i = 0; i < MAX_ASID; i += ARM_MAX_ASID) {
2949 asid_bitmap[i / (sizeof(uint32_t) * NBBY)] &= ~(1 << (i % (sizeof(uint32_t) * NBBY)));
2950
2951 #if __ARM_KERNEL_PROTECT__
2952 assert((i + 1) < MAX_ASID);
2953 asid_bitmap[(i + 1) / (sizeof(uint32_t) * NBBY)] &= ~(1 << ((i + 1) % (sizeof(uint32_t) * NBBY)));
2954 #endif /* __ARM_KERNEL_PROTECT__ */
2955 }
2956
2957 kernel_pmap->asid = 0;
2958 kernel_pmap->vasid = 0;
2959
2960 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof (maxoffset))) {
2961 maxoffset = trunc_page(maxoffset);
2962 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
2963 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
2964 arm_pmap_max_offset_default = maxoffset;
2965 }
2966 }
2967 #if defined(__arm64__)
2968 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof (maxoffset))) {
2969 maxoffset = trunc_page(maxoffset);
2970 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
2971 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
2972 arm64_pmap_max_offset_default = maxoffset;
2973 }
2974 }
2975 #endif
2976
2977 #if DEVELOPMENT || DEBUG
2978 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof (panic_on_unsigned_execute));
2979 #endif /* DEVELOPMENT || DEBUG */
2980
2981 pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
2982 pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
2983
2984 simple_lock_init(&phys_backup_lock, 0);
2985
2986 #if MACH_ASSERT
2987 PE_parse_boot_argn("pmap_stats_assert",
2988 &pmap_stats_assert,
2989 sizeof (pmap_stats_assert));
2990 #endif /* MACH_ASSERT */
2991
2992 #if KASAN
2993 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
2994 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
2995 #endif /* KASAN */
2996 }
2997
2998
2999 void
3000 pmap_virtual_space(
3001 vm_offset_t *startp,
3002 vm_offset_t *endp
3003 )
3004 {
3005 *startp = virtual_space_start;
3006 *endp = virtual_space_end;
3007 }
3008
3009
3010 boolean_t
3011 pmap_virtual_region(
3012 unsigned int region_select,
3013 vm_map_offset_t *startp,
3014 vm_map_size_t *size
3015 )
3016 {
3017 boolean_t ret = FALSE;
3018 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3019 if (region_select == 0) {
3020 /*
3021 * In this config, the bootstrap mappings should occupy their own L2
3022 * TTs, as they should be immutable after boot. Having the associated
3023 * TTEs and PTEs in their own pages allows us to lock down those pages,
3024 * while allowing the rest of the kernel address range to be remapped.
3025 */
3026 #if (__ARM_VMSA__ > 7)
3027 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
3028 #else
3029 #error Unsupported configuration
3030 #endif
3031 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
3032 ret = TRUE;
3033 }
3034 #else
3035 #if (__ARM_VMSA__ > 7)
3036 unsigned long low_global_vr_mask = 0;
3037 vm_map_size_t low_global_vr_size = 0;
3038 #endif
3039
3040 if (region_select == 0) {
3041 #if (__ARM_VMSA__ == 7)
3042 *startp = gVirtBase & 0xFFC00000;
3043 *size = ((virtual_space_start-(gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3044 #else
3045 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3046 if (!TEST_PAGE_SIZE_4K) {
3047 *startp = gVirtBase & 0xFFFFFFFFFE000000;
3048 *size = ((virtual_space_start-(gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3049 } else {
3050 *startp = gVirtBase & 0xFFFFFFFFFF800000;
3051 *size = ((virtual_space_start-(gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3052 }
3053 #endif
3054 ret = TRUE;
3055 }
3056 if (region_select == 1) {
3057 *startp = VREGION1_START;
3058 *size = VREGION1_SIZE;
3059 ret = TRUE;
3060 }
3061 #if (__ARM_VMSA__ > 7)
3062 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3063 if (!TEST_PAGE_SIZE_4K) {
3064 low_global_vr_mask = 0xFFFFFFFFFE000000;
3065 low_global_vr_size = 0x2000000;
3066 } else {
3067 low_global_vr_mask = 0xFFFFFFFFFF800000;
3068 low_global_vr_size = 0x800000;
3069 }
3070
3071 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
3072 *startp = LOW_GLOBAL_BASE_ADDRESS;
3073 *size = low_global_vr_size;
3074 ret = TRUE;
3075 }
3076
3077 if (region_select == 3) {
3078 /* In this config, we allow the bootstrap mappings to occupy the same
3079 * page table pages as the heap.
3080 */
3081 *startp = VM_MIN_KERNEL_ADDRESS;
3082 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
3083 ret = TRUE;
3084 }
3085 #endif
3086 #endif
3087 return ret;
3088 }
3089
3090 unsigned int
3091 pmap_free_pages(
3092 void)
3093 {
3094 return (unsigned int)atop(avail_end - first_avail);
3095 }
3096
3097
3098 boolean_t
3099 pmap_next_page_hi(
3100 ppnum_t * pnum)
3101 {
3102 return pmap_next_page(pnum);
3103 }
3104
3105
3106 boolean_t
3107 pmap_next_page(
3108 ppnum_t *pnum)
3109 {
3110 if (first_avail != avail_end) {
3111 *pnum = (ppnum_t)atop(first_avail);
3112 first_avail += PAGE_SIZE;
3113 return TRUE;
3114 }
3115 return FALSE;
3116 }
3117
3118
3119 /*
3120 * Initialize the pmap module.
3121 * Called by vm_init, to initialize any structures that the pmap
3122 * system needs to map virtual memory.
3123 */
3124 void
3125 pmap_init(
3126 void)
3127 {
3128 /*
3129 * Protect page zero in the kernel map.
3130 * (can be overruled by permanent transltion
3131 * table entries at page zero - see arm_vm_init).
3132 */
3133 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
3134
3135 pmap_initialized = TRUE;
3136
3137 pmap_zone_init();
3138
3139
3140 /*
3141 * Initialize the pmap object (for tracking the vm_page_t
3142 * structures for pages we allocate to be page tables in
3143 * pmap_expand().
3144 */
3145 _vm_object_allocate(mem_size, pmap_object);
3146 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3147
3148 pv_init();
3149
3150 /*
3151 * The value of hard_maxproc may have been scaled, make sure
3152 * it is still less than the value of MAX_ASID.
3153 */
3154 assert(hard_maxproc < MAX_ASID);
3155
3156 #if CONFIG_PGTRACE
3157 pmap_pgtrace_init();
3158 #endif
3159 }
3160
3161 boolean_t
3162 pmap_verify_free(
3163 ppnum_t ppnum)
3164 {
3165 pv_entry_t **pv_h;
3166 int pai;
3167 boolean_t result = TRUE;
3168 pmap_paddr_t phys = ptoa(ppnum);
3169
3170 assert(phys != vm_page_fictitious_addr);
3171
3172 if (!pa_valid(phys))
3173 return (FALSE);
3174
3175 pai = (int)pa_index(phys);
3176 pv_h = pai_to_pvh(pai);
3177
3178 result = (pvh_list(pv_h) == PV_ENTRY_NULL);
3179
3180 return (result);
3181 }
3182
3183
3184 /*
3185 * Initialize zones used by pmap.
3186 */
3187 static void
3188 pmap_zone_init(
3189 void)
3190 {
3191 /*
3192 * Create the zone of physical maps
3193 * and the physical-to-virtual entries.
3194 */
3195 pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap)*256,
3196 PAGE_SIZE, "pmap");
3197 }
3198
3199
3200 /*
3201 * Create and return a physical map.
3202 *
3203 * If the size specified for the map
3204 * is zero, the map is an actual physical
3205 * map, and may be referenced by the
3206 * hardware.
3207 *
3208 * If the size specified is non-zero,
3209 * the map will be used in software only, and
3210 * is bounded by that size.
3211 */
3212 static pmap_t
3213 pmap_create_internal(
3214 ledger_t ledger,
3215 vm_map_size_t size,
3216 boolean_t is_64bit)
3217 {
3218 unsigned i;
3219 pmap_t p;
3220
3221 /*
3222 * A software use-only map doesn't even need a pmap.
3223 */
3224 if (size != 0) {
3225 return (PMAP_NULL);
3226 }
3227
3228
3229 /*
3230 * Allocate a pmap struct from the pmap_zone. Then allocate
3231 * the translation table of the right size for the pmap.
3232 */
3233 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL)
3234 return (PMAP_NULL);
3235
3236 if (is_64bit) {
3237 p->min = MACH_VM_MIN_ADDRESS;
3238 p->max = MACH_VM_MAX_ADDRESS;
3239 } else {
3240 p->min = VM_MIN_ADDRESS;
3241 p->max = VM_MAX_ADDRESS;
3242 }
3243
3244 p->wired = 0;
3245 p->ref_count = 1;
3246 p->gc_status = 0;
3247 p->stamp = hw_atomic_add(&pmap_stamp, 1);
3248 p->nx_enabled = TRUE;
3249 p->is_64bit = is_64bit;
3250 p->nested = FALSE;
3251 p->nested_pmap = PMAP_NULL;
3252
3253
3254 ledger_reference(ledger);
3255 p->ledger = ledger;
3256
3257 PMAP_LOCK_INIT(p);
3258 #if (__ARM_VMSA__ == 7)
3259 simple_lock_init(&p->tt1_lock, 0);
3260 #endif
3261 memset((void *) &p->stats, 0, sizeof(p->stats));
3262
3263 p->tt_entry_free = (tt_entry_t *)0;
3264
3265 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
3266 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
3267
3268 #if (__ARM_VMSA__ == 7)
3269 p->tte_index_max = NTTES;
3270 #else
3271 p->tte_index_max = (PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t));
3272 #endif
3273 p->prev_tte = (tt_entry_t *) NULL;
3274 p->cpu_ref = 0;
3275
3276 /* nullify the translation table */
3277 for (i = 0; i < p->tte_index_max; i++)
3278 p->tte[i] = ARM_TTE_TYPE_FAULT;
3279
3280 #ifndef __ARM_L1_PTW__
3281 CleanPoU_DcacheRegion((vm_offset_t) (p->tte), PMAP_ROOT_ALLOC_SIZE);
3282 #else
3283 __asm__ volatile("dsb ish");
3284 #endif
3285 /* assign a asid */
3286 p->vasid = alloc_asid();
3287 p->asid = p->vasid % ARM_MAX_ASID;
3288
3289 /*
3290 * initialize the rest of the structure
3291 */
3292 p->nested_region_grand_addr = 0x0ULL;
3293 p->nested_region_subord_addr = 0x0ULL;
3294 p->nested_region_size = 0x0ULL;
3295 p->nested_region_asid_bitmap = NULL;
3296 p->nested_region_asid_bitmap_size = 0x0UL;
3297
3298 #if MACH_ASSERT
3299 p->pmap_stats_assert = TRUE;
3300 p->pmap_pid = 0;
3301 strlcpy(p->pmap_procname, "<nil>", sizeof (p->pmap_procname));
3302 #endif /* MACH_ASSERT */
3303 #if DEVELOPMENT || DEBUG
3304 p->footprint_suspended = FALSE;
3305 p->footprint_was_suspended = FALSE;
3306 #endif /* DEVELOPMENT || DEBUG */
3307
3308 simple_lock(&pmaps_lock);
3309 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
3310 simple_unlock(&pmaps_lock);
3311
3312 return (p);
3313 }
3314
3315 pmap_t
3316 pmap_create(
3317 ledger_t ledger,
3318 vm_map_size_t size,
3319 boolean_t is_64bit)
3320 {
3321 pmap_t pmap;
3322
3323 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, is_64bit);
3324
3325 pmap = pmap_create_internal(ledger, size, is_64bit);
3326
3327 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END,
3328 VM_KERNEL_ADDRHIDE(pmap));
3329
3330 return pmap;
3331 }
3332
3333 #if MACH_ASSERT
3334 static void
3335 pmap_set_process_internal(
3336 __unused pmap_t pmap,
3337 __unused int pid,
3338 __unused char *procname)
3339 {
3340 #if MACH_ASSERT
3341 if (pmap == NULL) {
3342 return;
3343 }
3344
3345 pmap->pmap_pid = pid;
3346 strlcpy(pmap->pmap_procname, procname, sizeof (pmap->pmap_procname));
3347 if (!strncmp(procname, "corecaptured", sizeof (pmap->pmap_procname))) {
3348 /*
3349 * XXX FBDP
3350 * "corecaptured" somehow triggers some issues that make
3351 * the pmap stats and ledgers to go off track, causing
3352 * some assertion failures and ledger panics.
3353 * Turn that off if the terminating process is "corecaptured".
3354 */
3355 pmap->pmap_stats_assert = FALSE;
3356 ledger_disable_panic_on_negative(pmap->ledger,
3357 task_ledgers.phys_footprint);
3358 ledger_disable_panic_on_negative(pmap->ledger,
3359 task_ledgers.internal);
3360 ledger_disable_panic_on_negative(pmap->ledger,
3361 task_ledgers.internal_compressed);
3362 ledger_disable_panic_on_negative(pmap->ledger,
3363 task_ledgers.iokit_mapped);
3364 ledger_disable_panic_on_negative(pmap->ledger,
3365 task_ledgers.alternate_accounting);
3366 ledger_disable_panic_on_negative(pmap->ledger,
3367 task_ledgers.alternate_accounting_compressed);
3368 }
3369 #endif /* MACH_ASSERT */
3370 }
3371 #endif /* MACH_ASSERT*/
3372
3373 #if MACH_ASSERT
3374 void
3375 pmap_set_process(
3376 pmap_t pmap,
3377 int pid,
3378 char *procname)
3379 {
3380 pmap_set_process_internal(pmap, pid, procname);
3381 }
3382
3383 /*
3384 * We maintain stats and ledgers so that a task's physical footprint is:
3385 * phys_footprint = ((internal - alternate_accounting)
3386 * + (internal_compressed - alternate_accounting_compressed)
3387 * + iokit_mapped
3388 * + purgeable_nonvolatile
3389 * + purgeable_nonvolatile_compressed
3390 * + page_table)
3391 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3392 */
3393
3394 struct {
3395 uint64_t num_pmaps_checked;
3396
3397 int phys_footprint_over;
3398 ledger_amount_t phys_footprint_over_total;
3399 ledger_amount_t phys_footprint_over_max;
3400 int phys_footprint_under;
3401 ledger_amount_t phys_footprint_under_total;
3402 ledger_amount_t phys_footprint_under_max;
3403
3404 int internal_over;
3405 ledger_amount_t internal_over_total;
3406 ledger_amount_t internal_over_max;
3407 int internal_under;
3408 ledger_amount_t internal_under_total;
3409 ledger_amount_t internal_under_max;
3410
3411 int internal_compressed_over;
3412 ledger_amount_t internal_compressed_over_total;
3413 ledger_amount_t internal_compressed_over_max;
3414 int internal_compressed_under;
3415 ledger_amount_t internal_compressed_under_total;
3416 ledger_amount_t internal_compressed_under_max;
3417
3418 int iokit_mapped_over;
3419 ledger_amount_t iokit_mapped_over_total;
3420 ledger_amount_t iokit_mapped_over_max;
3421 int iokit_mapped_under;
3422 ledger_amount_t iokit_mapped_under_total;
3423 ledger_amount_t iokit_mapped_under_max;
3424
3425 int alternate_accounting_over;
3426 ledger_amount_t alternate_accounting_over_total;
3427 ledger_amount_t alternate_accounting_over_max;
3428 int alternate_accounting_under;
3429 ledger_amount_t alternate_accounting_under_total;
3430 ledger_amount_t alternate_accounting_under_max;
3431
3432 int alternate_accounting_compressed_over;
3433 ledger_amount_t alternate_accounting_compressed_over_total;
3434 ledger_amount_t alternate_accounting_compressed_over_max;
3435 int alternate_accounting_compressed_under;
3436 ledger_amount_t alternate_accounting_compressed_under_total;
3437 ledger_amount_t alternate_accounting_compressed_under_max;
3438
3439 int page_table_over;
3440 ledger_amount_t page_table_over_total;
3441 ledger_amount_t page_table_over_max;
3442 int page_table_under;
3443 ledger_amount_t page_table_under_total;
3444 ledger_amount_t page_table_under_max;
3445
3446 int purgeable_volatile_over;
3447 ledger_amount_t purgeable_volatile_over_total;
3448 ledger_amount_t purgeable_volatile_over_max;
3449 int purgeable_volatile_under;
3450 ledger_amount_t purgeable_volatile_under_total;
3451 ledger_amount_t purgeable_volatile_under_max;
3452
3453 int purgeable_nonvolatile_over;
3454 ledger_amount_t purgeable_nonvolatile_over_total;
3455 ledger_amount_t purgeable_nonvolatile_over_max;
3456 int purgeable_nonvolatile_under;
3457 ledger_amount_t purgeable_nonvolatile_under_total;
3458 ledger_amount_t purgeable_nonvolatile_under_max;
3459
3460 int purgeable_volatile_compressed_over;
3461 ledger_amount_t purgeable_volatile_compressed_over_total;
3462 ledger_amount_t purgeable_volatile_compressed_over_max;
3463 int purgeable_volatile_compressed_under;
3464 ledger_amount_t purgeable_volatile_compressed_under_total;
3465 ledger_amount_t purgeable_volatile_compressed_under_max;
3466
3467 int purgeable_nonvolatile_compressed_over;
3468 ledger_amount_t purgeable_nonvolatile_compressed_over_total;
3469 ledger_amount_t purgeable_nonvolatile_compressed_over_max;
3470 int purgeable_nonvolatile_compressed_under;
3471 ledger_amount_t purgeable_nonvolatile_compressed_under_total;
3472 ledger_amount_t purgeable_nonvolatile_compressed_under_max;
3473 } pmap_ledgers_drift;
3474 #endif /* MACH_ASSERT */
3475
3476 /*
3477 * Retire the given physical map from service.
3478 * Should only be called if the map contains
3479 * no valid mappings.
3480 */
3481 static void
3482 pmap_destroy_internal(
3483 pmap_t pmap)
3484 {
3485 #if (__ARM_VMSA__ == 7)
3486 pt_entry_t *ttep;
3487 unsigned int i;
3488 pmap_t tmp_pmap, tt_pmap;
3489 queue_head_t tmp_pmap_list;
3490
3491 queue_init(&tmp_pmap_list);
3492 simple_lock(&pmaps_lock);
3493 tt_pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&tt_pmap_list));
3494 while (!queue_end(&tt_pmap_list, (queue_entry_t)tt_pmap)) {
3495 if (tt_pmap->cpu_ref == 0 ) {
3496 tmp_pmap = tt_pmap;
3497 tt_pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&tmp_pmap->pmaps));
3498 queue_remove(&tt_pmap_list, tmp_pmap, pmap_t, pmaps);
3499 tt_pmap_count--;
3500 queue_enter(&tmp_pmap_list, tmp_pmap, pmap_t, pmaps);
3501 } else {
3502 tmp_pmap = tt_pmap;
3503 tt_pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&tmp_pmap->pmaps));
3504 }
3505 }
3506 simple_unlock(&pmaps_lock);
3507
3508 tmp_pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&tmp_pmap_list));
3509 while (!queue_end(&tmp_pmap_list, (queue_entry_t)tmp_pmap)) {
3510 tt_pmap = tmp_pmap;
3511 tmp_pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&tt_pmap->pmaps));
3512 queue_remove(&tmp_pmap_list, tt_pmap, pmap_t, pmaps);
3513 if (tt_pmap->tte) {
3514 pmap_tt1_deallocate(pmap, tt_pmap->tte, tt_pmap->tte_index_max*sizeof(tt_entry_t), 0);
3515 tt_pmap->tte = (tt_entry_t *) NULL;
3516 tt_pmap->ttep = 0;
3517 tt_pmap->tte_index_max = 0;
3518 }
3519 if (tt_pmap->prev_tte) {
3520 pmap_tt1_deallocate(pmap, tt_pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0);
3521 tt_pmap->prev_tte = (tt_entry_t *) NULL;
3522 }
3523 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3524 free_asid(tt_pmap->vasid);
3525
3526 pmap_check_ledgers(tt_pmap);
3527 ledger_dereference(tt_pmap->ledger);
3528
3529 zfree(pmap_zone, tt_pmap);
3530 }
3531
3532 if (pmap == PMAP_NULL)
3533 return;
3534
3535 if (hw_atomic_sub(&pmap->ref_count, 1) != 0)
3536 return;
3537
3538 simple_lock(&pmaps_lock);
3539
3540 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3541 pmap->gc_status |= PMAP_GC_WAIT;
3542 assert_wait((event_t) & pmap->gc_status, THREAD_UNINT);
3543 simple_unlock(&pmaps_lock);
3544 (void) thread_block(THREAD_CONTINUE_NULL);
3545 simple_lock(&pmaps_lock);
3546
3547 }
3548
3549 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3550 simple_unlock(&pmaps_lock);
3551
3552 /*
3553 * Free the memory maps, then the
3554 * pmap structure.
3555 */
3556 PMAP_LOCK(pmap);
3557 for (i = 0; i < pmap->tte_index_max; i++) {
3558 ttep = &pmap->tte[i];
3559 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3560 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
3561 flush_mmu_tlb_entry((i<<ARM_TT_L1_SHIFT) | (pmap->asid & 0xff));
3562 }
3563 }
3564 PMAP_UNLOCK(pmap);
3565
3566 if (pmap->cpu_ref == 0) {
3567 if (pmap->tte) {
3568 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max*sizeof(tt_entry_t), 0);
3569 pmap->tte = (tt_entry_t *) NULL;
3570 pmap->ttep = 0;
3571 pmap->tte_index_max = 0;
3572 }
3573 if (pmap->prev_tte) {
3574 pmap_tt1_deallocate(pmap, pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0);
3575 pmap->prev_tte = (tt_entry_t *) NULL;
3576 }
3577 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3578
3579 /* return its asid to the pool */
3580 free_asid(pmap->vasid);
3581 pmap_check_ledgers(pmap);
3582
3583 ledger_dereference(pmap->ledger);
3584 if (pmap->nested_region_asid_bitmap)
3585 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size*sizeof(unsigned int));
3586 zfree(pmap_zone, pmap);
3587 } else {
3588 simple_lock(&pmaps_lock);
3589 queue_enter(&tt_pmap_list, pmap, pmap_t, pmaps);
3590 tt_pmap_count++;
3591 if (tt_pmap_count > tt_pmap_max)
3592 tt_pmap_max = tt_pmap_count;
3593 simple_unlock(&pmaps_lock);
3594 }
3595 #else
3596 pt_entry_t *ttep;
3597 pmap_paddr_t pa;
3598 vm_map_address_t c;
3599
3600 if (pmap == PMAP_NULL) {
3601 return;
3602 }
3603
3604 pmap_unmap_sharedpage(pmap);
3605
3606 if (hw_atomic_sub(&pmap->ref_count, 1) == 0) {
3607
3608 simple_lock(&pmaps_lock);
3609 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3610 pmap->gc_status |= PMAP_GC_WAIT;
3611 assert_wait((event_t) & pmap->gc_status, THREAD_UNINT);
3612 simple_unlock(&pmaps_lock);
3613 (void) thread_block(THREAD_CONTINUE_NULL);
3614 simple_lock(&pmaps_lock);
3615 }
3616 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3617 simple_unlock(&pmaps_lock);
3618
3619 /*
3620 * Free the memory maps, then the
3621 * pmap structure.
3622 */
3623 for (c = pmap->min; c < pmap->max; c += ARM_TT_L2_SIZE) {
3624 ttep = pmap_tt2e(pmap, c);
3625 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3626 PMAP_LOCK(pmap);
3627 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L2_LEVEL);
3628 PMAP_UNLOCK(pmap);
3629 flush_mmu_tlb_entry(tlbi_addr(c) | tlbi_asid(pmap->asid));
3630 }
3631 }
3632 #if !__ARM64_TWO_LEVEL_PMAP__
3633 for (c = pmap->min; c < pmap->max; c += ARM_TT_L1_SIZE) {
3634 ttep = pmap_tt1e(pmap, c);
3635 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3636 PMAP_LOCK(pmap);
3637 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
3638 PMAP_UNLOCK(pmap);
3639 }
3640 }
3641 #endif
3642
3643 if (pmap->tte) {
3644 pa = pmap->ttep;
3645 pmap_tt1_deallocate(pmap, (tt_entry_t *)phystokv(pa), PMAP_ROOT_ALLOC_SIZE, 0);
3646 }
3647
3648
3649 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3650 flush_mmu_tlb_asid((uint64_t)(pmap->asid) << TLBI_ASID_SHIFT);
3651 free_asid(pmap->vasid);
3652
3653 if (pmap->nested_region_asid_bitmap) {
3654 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size*sizeof(unsigned int));
3655 }
3656
3657 pmap_check_ledgers(pmap);
3658 ledger_dereference(pmap->ledger);
3659
3660 zfree(pmap_zone, pmap);
3661 }
3662
3663 #endif
3664 }
3665
3666 void
3667 pmap_destroy(
3668 pmap_t pmap)
3669 {
3670 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START,
3671 VM_KERNEL_ADDRHIDE(pmap));
3672
3673 pmap_destroy_internal(pmap);
3674
3675 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
3676 }
3677
3678
3679 /*
3680 * Add a reference to the specified pmap.
3681 */
3682 static void
3683 pmap_reference_internal(
3684 pmap_t pmap)
3685 {
3686 if (pmap != PMAP_NULL) {
3687 (void) hw_atomic_add(&pmap->ref_count, 1);
3688 }
3689 }
3690
3691 void
3692 pmap_reference(
3693 pmap_t pmap)
3694 {
3695 pmap_reference_internal(pmap);
3696 }
3697
3698 static tt_entry_t *
3699 pmap_tt1_allocate(
3700 pmap_t pmap,
3701 vm_size_t size,
3702 unsigned option)
3703 {
3704 tt_entry_t *tt1;
3705 tt_free_entry_t *tt1_free;
3706 pmap_paddr_t pa;
3707 vm_address_t va;
3708 vm_address_t va_end;
3709 kern_return_t ret;
3710
3711 simple_lock(&pmaps_lock);
3712 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
3713 free_page_size_tt_count--;
3714 tt1 = (tt_entry_t *)free_page_size_tt_list;
3715 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3716 simple_unlock(&pmaps_lock);
3717 pmap_tt_ledger_credit(pmap, size);
3718 return (tt_entry_t *)tt1;
3719 };
3720 if ((size == 2*PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
3721 free_two_page_size_tt_count--;
3722 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
3723 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3724 simple_unlock(&pmaps_lock);
3725 pmap_tt_ledger_credit(pmap, size);
3726 return (tt_entry_t *)tt1;
3727 };
3728 if (free_tt_count != 0) {
3729 free_tt_count--;
3730 tt1 = (tt_entry_t *)free_tt_list;
3731 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
3732 simple_unlock(&pmaps_lock);
3733 pmap_tt_ledger_credit(pmap, size);
3734 return (tt_entry_t *)tt1;
3735 }
3736
3737 simple_unlock(&pmaps_lock);
3738
3739 ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
3740
3741 if(ret == KERN_RESOURCE_SHORTAGE)
3742 return (tt_entry_t *)0;
3743
3744
3745 if (size < PAGE_SIZE) {
3746 simple_lock(&pmaps_lock);
3747
3748 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + size; va < va_end; va = va+size) {
3749 tt1_free = (tt_free_entry_t *)va;
3750 tt1_free->next = free_tt_list;
3751 free_tt_list = tt1_free;
3752 free_tt_count++;
3753 }
3754 if (free_tt_count > free_tt_max)
3755 free_tt_max = free_tt_count;
3756
3757 simple_unlock(&pmaps_lock);
3758 }
3759
3760 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
3761 * Depending on the device, this can vary between 512b and 16K. */
3762 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3763 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
3764 pmap_tt_ledger_credit(pmap, size);
3765
3766 return (tt_entry_t *) phystokv(pa);
3767 }
3768
3769 static void
3770 pmap_tt1_deallocate(
3771 pmap_t pmap,
3772 tt_entry_t *tt,
3773 vm_size_t size,
3774 unsigned option)
3775 {
3776 tt_free_entry_t *tt_entry;
3777
3778 tt_entry = (tt_free_entry_t *)tt;
3779 if (not_in_kdp)
3780 simple_lock(&pmaps_lock);
3781
3782 if (size < PAGE_SIZE) {
3783 free_tt_count++;
3784 if (free_tt_count > free_tt_max)
3785 free_tt_max = free_tt_count;
3786 tt_entry->next = free_tt_list;
3787 free_tt_list = tt_entry;
3788 }
3789
3790 if (size == PAGE_SIZE) {
3791 free_page_size_tt_count++;
3792 if (free_page_size_tt_count > free_page_size_tt_max)
3793 free_page_size_tt_max = free_page_size_tt_count;
3794 tt_entry->next = free_page_size_tt_list;
3795 free_page_size_tt_list = tt_entry;
3796 }
3797
3798 if (size == 2*PAGE_SIZE) {
3799 free_two_page_size_tt_count++;
3800 if (free_two_page_size_tt_count > free_two_page_size_tt_max)
3801 free_two_page_size_tt_max = free_two_page_size_tt_count;
3802 tt_entry->next = free_two_page_size_tt_list;
3803 free_two_page_size_tt_list = tt_entry;
3804 }
3805
3806 if ((option & PMAP_TT_DEALLOCATE_NOBLOCK) || (!not_in_kdp)) {
3807 if (not_in_kdp)
3808 simple_unlock(&pmaps_lock);
3809 pmap_tt_ledger_debit(pmap, size);
3810 return;
3811 }
3812
3813 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
3814
3815 free_page_size_tt_count--;
3816 tt = (tt_entry_t *)free_page_size_tt_list;
3817 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3818
3819 simple_unlock(&pmaps_lock);
3820
3821 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
3822
3823 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3824
3825 simple_lock(&pmaps_lock);
3826 }
3827
3828 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
3829 free_two_page_size_tt_count--;
3830 tt = (tt_entry_t *)free_two_page_size_tt_list;
3831 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3832
3833 simple_unlock(&pmaps_lock);
3834
3835 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2*PAGE_SIZE);
3836
3837 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3838
3839 simple_lock(&pmaps_lock);
3840 }
3841 simple_unlock(&pmaps_lock);
3842 pmap_tt_ledger_debit(pmap, size);
3843 }
3844
3845 static kern_return_t
3846 pmap_tt_allocate(
3847 pmap_t pmap,
3848 tt_entry_t **ttp,
3849 unsigned int level,
3850 unsigned int options)
3851 {
3852 pmap_paddr_t pa;
3853 *ttp = NULL;
3854
3855 PMAP_LOCK(pmap);
3856 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
3857 tt_free_entry_t *tt_free_next;
3858
3859 tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
3860 *ttp = (tt_entry_t *)pmap->tt_entry_free;
3861 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
3862 }
3863 PMAP_UNLOCK(pmap);
3864
3865 if (*ttp == NULL) {
3866 pt_desc_t *ptdp;
3867
3868 /*
3869 * Allocate a VM page for the level x page table entries.
3870 */
3871 while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
3872 if(options & PMAP_OPTIONS_NOWAIT) {
3873 return KERN_RESOURCE_SHORTAGE;
3874 }
3875 VM_PAGE_WAIT();
3876 }
3877
3878 if (level < PMAP_TT_MAX_LEVEL) {
3879 OSAddAtomic64(1, &alloc_ttepages_count);
3880 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
3881 } else {
3882 OSAddAtomic64(1, &alloc_ptepages_count);
3883 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
3884 }
3885
3886 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
3887
3888 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
3889
3890 ptdp = ptd_alloc(pmap);
3891 *(pt_desc_t **)pai_to_pvh(pa_index(pa)) = ptdp;
3892
3893 __unreachable_ok_push
3894 if (TEST_PAGE_RATIO_4) {
3895 vm_address_t va;
3896 vm_address_t va_end;
3897
3898 PMAP_LOCK(pmap);
3899
3900 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va+ARM_PGBYTES) {
3901 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3902 pmap->tt_entry_free = (tt_entry_t *)va;
3903 }
3904 PMAP_UNLOCK(pmap);
3905 }
3906 __unreachable_ok_pop
3907
3908 *ttp = (tt_entry_t *)phystokv(pa);
3909 }
3910
3911
3912 return KERN_SUCCESS;
3913 }
3914
3915
3916 static void
3917 pmap_tt_deallocate(
3918 pmap_t pmap,
3919 tt_entry_t *ttp,
3920 unsigned int level)
3921 {
3922 pt_desc_t *ptdp;
3923 unsigned pt_acc_cnt;
3924 unsigned i, max_pt_index = PAGE_RATIO;
3925 vm_offset_t free_page=0;
3926
3927 PMAP_LOCK(pmap);
3928
3929 ptdp = ptep_get_ptd((vm_offset_t)ttp);
3930
3931 if (level < PMAP_TT_MAX_LEVEL) {
3932
3933 if (ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT)
3934 ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
3935 }
3936
3937 ptdp->pt_map[ARM_PT_DESC_INDEX(ttp)].va = 0;
3938
3939 if (ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt != 0)
3940 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->pt_cnt[ARM_PT_DESC_INDEX(ttp)].refcnt);
3941
3942 for (i = 0, pt_acc_cnt = 0 ; i < max_pt_index ; i++)
3943 pt_acc_cnt += ptdp->pt_cnt[i].refcnt;
3944
3945 if (pt_acc_cnt == 0) {
3946 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
3947 unsigned pt_free_entry_cnt = 1;
3948
3949 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
3950 tt_free_entry_t *tt_free_list_next;
3951
3952 tt_free_list_next = tt_free_list->next;
3953 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
3954 pt_free_entry_cnt++;
3955 }
3956 tt_free_list = tt_free_list_next;
3957 }
3958 if (pt_free_entry_cnt == max_pt_index) {
3959 tt_free_entry_t *tt_free_list_cur;
3960
3961 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
3962 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
3963 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
3964
3965 while (tt_free_list_cur) {
3966 tt_free_entry_t *tt_free_list_next;
3967
3968 tt_free_list_next = tt_free_list_cur->next;
3969 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
3970 tt_free_list->next = tt_free_list_next->next;
3971 } else {
3972 tt_free_list = tt_free_list_next;
3973 }
3974 tt_free_list_cur = tt_free_list_next;
3975 }
3976 } else {
3977 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3978 pmap->tt_entry_free = ttp;
3979 }
3980 } else {
3981 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3982 pmap->tt_entry_free = ttp;
3983 }
3984
3985 PMAP_UNLOCK(pmap);
3986
3987 if (free_page != 0) {
3988
3989 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
3990 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
3991 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
3992 if (level < PMAP_TT_MAX_LEVEL)
3993 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
3994 else
3995 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
3996 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
3997 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
3998 }
3999 }
4000
4001 static void
4002 pmap_tte_deallocate(
4003 pmap_t pmap,
4004 tt_entry_t *ttep,
4005 unsigned int level)
4006 {
4007 pmap_paddr_t pa;
4008 tt_entry_t tte;
4009
4010 PMAP_ASSERT_LOCKED(pmap);
4011
4012 tte = *ttep;
4013
4014 if (tte == 0) {
4015 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
4016 }
4017
4018 #if MACH_ASSERT
4019 if (tte_get_ptd(tte)->pmap != pmap) {
4020 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4021 tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
4022 }
4023 #endif
4024 if (((level+1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->pt_cnt[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
4025 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
4026 tte_get_ptd(tte), (tte_get_ptd(tte)->pt_cnt[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
4027 }
4028
4029 #if (__ARM_VMSA__ == 7)
4030 {
4031 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
4032 unsigned i;
4033
4034 for (i = 0; i<4; i++, ttep_4M++)
4035 *ttep_4M = (tt_entry_t) 0;
4036 }
4037 #else
4038 *ttep = (tt_entry_t) 0;
4039 #endif
4040
4041 #ifndef __ARM_L1_PTW__
4042 CleanPoU_DcacheRegion((vm_offset_t) ttep, sizeof(tt_entry_t));
4043 #else
4044 __asm__ volatile("dsb ish");
4045 #endif
4046 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4047 #if MACH_ASSERT
4048 {
4049 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
4050 unsigned i;
4051
4052 for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++,pte_p++) {
4053 if (ARM_PTE_IS_COMPRESSED(*pte_p)) {
4054 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4055 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4056 } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
4057 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4058 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4059 }
4060 }
4061 }
4062 #endif
4063 PMAP_UNLOCK(pmap);
4064
4065 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4066 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4067 pa = tte_to_pa(tte) & ~ARM_PGMASK;
4068 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level+1);
4069 PMAP_LOCK(pmap);
4070 }
4071 }
4072
4073 /*
4074 * Remove a range of hardware page-table entries.
4075 * The entries given are the first (inclusive)
4076 * and last (exclusive) entries for the VM pages.
4077 * The virtual address is the va for the first pte.
4078 *
4079 * The pmap must be locked.
4080 * If the pmap is not the kernel pmap, the range must lie
4081 * entirely within one pte-page. This is NOT checked.
4082 * Assumes that the pte-page exists.
4083 *
4084 * Returns the number of PTE changed, and sets *rmv_cnt
4085 * to the number of SPTE changed.
4086 */
4087 static int
4088 pmap_remove_range(
4089 pmap_t pmap,
4090 vm_map_address_t va,
4091 pt_entry_t *bpte,
4092 pt_entry_t *epte,
4093 uint32_t *rmv_cnt)
4094 {
4095 return pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
4096 PMAP_OPTIONS_REMOVE);
4097 }
4098
4099 #if MACH_ASSERT
4100 int num_reusable_mismatch = 0;
4101 #endif /* MACH_ASSERT */
4102
4103 static int
4104 pmap_remove_range_options(
4105 pmap_t pmap,
4106 vm_map_address_t va,
4107 pt_entry_t *bpte,
4108 pt_entry_t *epte,
4109 uint32_t *rmv_cnt,
4110 int options)
4111 {
4112 pt_entry_t *cpte;
4113 int num_removed, num_unwired;
4114 int num_pte_changed;
4115 int pai = 0;
4116 pmap_paddr_t pa;
4117 int num_external, num_internal, num_reusable;
4118 int num_alt_internal;
4119 uint64_t num_compressed, num_alt_compressed;
4120
4121 PMAP_ASSERT_LOCKED(pmap);
4122
4123 num_removed = 0;
4124 num_unwired = 0;
4125 num_pte_changed = 0;
4126 num_external = 0;
4127 num_internal = 0;
4128 num_reusable = 0;
4129 num_compressed = 0;
4130 num_alt_internal = 0;
4131 num_alt_compressed = 0;
4132
4133 for (cpte = bpte; cpte < epte;
4134 cpte += PAGE_SIZE/ARM_PGBYTES, va += PAGE_SIZE) {
4135 pv_entry_t **pv_h, **pve_pp;
4136 pv_entry_t *pve_p;
4137 pt_entry_t spte;
4138 boolean_t managed=FALSE;
4139
4140 spte = *cpte;
4141
4142 #if CONFIG_PGTRACE
4143 if (pgtrace_enabled) {
4144 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
4145 }
4146 #endif
4147
4148 while (!managed) {
4149 if (pmap != kernel_pmap &&
4150 (options & PMAP_OPTIONS_REMOVE) &&
4151 (ARM_PTE_IS_COMPRESSED(spte))) {
4152 /*
4153 * "pmap" must be locked at this point,
4154 * so this should not race with another
4155 * pmap_remove_range() or pmap_enter().
4156 */
4157
4158 /* one less "compressed"... */
4159 num_compressed++;
4160 if (spte & ARM_PTE_COMPRESSED_ALT) {
4161 /* ... but it used to be "ALTACCT" */
4162 num_alt_compressed++;
4163 }
4164
4165 /* clear marker */
4166 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4167 /*
4168 * "refcnt" also accounts for
4169 * our "compressed" markers,
4170 * so let's update it here.
4171 */
4172 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->pt_cnt[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0)
4173 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4174 spte = *cpte;
4175 }
4176 /*
4177 * It may be possible for the pte to transition from managed
4178 * to unmanaged in this timeframe; for now, elide the assert.
4179 * We should break out as a consequence of checking pa_valid.
4180 */
4181 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4182 pa = pte_to_pa(spte);
4183 if (!pa_valid(pa)) {
4184 break;
4185 }
4186 pai = (int)pa_index(pa);
4187 LOCK_PVH(pai);
4188 spte = *cpte;
4189 pa = pte_to_pa(spte);
4190 if (pai == (int)pa_index(pa)) {
4191 managed =TRUE;
4192 break; // Leave pai locked as we will unlock it after we free the PV entry
4193 }
4194 UNLOCK_PVH(pai);
4195 }
4196
4197 if (ARM_PTE_IS_COMPRESSED(*cpte)) {
4198 /*
4199 * There used to be a valid mapping here but it
4200 * has already been removed when the page was
4201 * sent to the VM compressor, so nothing left to
4202 * remove now...
4203 */
4204 continue;
4205 }
4206
4207 /* remove the translation, do not flush the TLB */
4208 if (*cpte != ARM_PTE_TYPE_FAULT) {
4209 assert(!ARM_PTE_IS_COMPRESSED(*cpte));
4210 #if MACH_ASSERT
4211 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
4212 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4213 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
4214 }
4215 #endif
4216 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4217 num_pte_changed++;
4218 }
4219
4220 if ((spte != ARM_PTE_TYPE_FAULT) &&
4221 (pmap != kernel_pmap)) {
4222 assert(!ARM_PTE_IS_COMPRESSED(spte));
4223 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->pt_cnt[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0)
4224 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4225 if(rmv_cnt) (*rmv_cnt)++;
4226 }
4227
4228 if (pte_is_wired(spte)) {
4229 pte_set_wired(cpte, 0);
4230 num_unwired++;
4231 }
4232 /*
4233 * if not managed, we're done
4234 */
4235 if (!managed)
4236 continue;
4237 /*
4238 * find and remove the mapping from the chain for this
4239 * physical address.
4240 */
4241 ASSERT_PVH_LOCKED(pai); // Should have been locked when we found the managed PTE above
4242 pv_h = pai_to_pvh(pai);
4243
4244 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4245 if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0))
4246 panic("pmap_remove_range(): cpte=%p (0x%llx) does not match pv_h=%p (%p)\n", cpte, (uint64_t)spte, pv_h, pvh_ptep(pv_h));
4247 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
4248 assert(IS_INTERNAL_PAGE(pai));
4249 num_internal++;
4250 num_alt_internal++;
4251 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
4252 } else if (IS_INTERNAL_PAGE(pai)) {
4253 if (IS_REUSABLE_PAGE(pai)) {
4254 num_reusable++;
4255 } else {
4256 num_internal++;
4257 }
4258 } else {
4259 num_external++;
4260 }
4261 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
4262 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4263
4264 pve_pp = pv_h;
4265 pve_p = pvh_list(pv_h);
4266
4267 while (pve_p != PV_ENTRY_NULL &&
4268 (pve_get_ptep(pve_p) != cpte)) {
4269 pve_pp = pve_link_field(pve_p);
4270 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4271 }
4272
4273 if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0)) {
4274 UNLOCK_PVH(pai);
4275 panic("pmap_remove_range(): cpte=%p (0x%llx) not in pv_h=%p\n", cpte, (uint64_t)spte, pv_h);
4276 }
4277
4278 #if MACH_ASSERT
4279 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
4280 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4281 while (check_pve_p != PV_ENTRY_NULL) {
4282 if (pve_get_ptep(check_pve_p) == cpte) {
4283 panic("pmap_remove_range(): duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, va=0x%llx\n",
4284 cpte, pmap, pv_h, pve_p, (uint64_t)spte, (uint64_t)va);
4285 }
4286 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
4287 }
4288 }
4289 #endif
4290
4291 if (IS_ALTACCT_PAGE(pai, pve_p)) {
4292 assert(IS_INTERNAL_PAGE(pai));
4293 num_internal++;
4294 num_alt_internal++;
4295 CLR_ALTACCT_PAGE(pai, pve_p);
4296 } else if (IS_INTERNAL_PAGE(pai)) {
4297 if (IS_REUSABLE_PAGE(pai)) {
4298 num_reusable++;
4299 } else {
4300 num_internal++;
4301 }
4302 } else {
4303 num_external++;
4304 }
4305
4306 pvh_remove(pv_h, pve_pp, pve_p) ;
4307 pv_free(pve_p);
4308 } else {
4309 panic("pmap_remove_range(): unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pte=0x%llx va=0x%llx\n",
4310 *pv_h, cpte, pmap, pv_h, (uint64_t)spte, (uint64_t)va);
4311 }
4312
4313 UNLOCK_PVH(pai);
4314 num_removed++;
4315 }
4316
4317 /*
4318 * Update the counts
4319 */
4320 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
4321 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
4322
4323 if (pmap != kernel_pmap) {
4324 /* sanity checks... */
4325 #if MACH_ASSERT
4326 if (pmap->stats.internal < num_internal) {
4327 if ((! pmap_stats_assert ||
4328 ! pmap->pmap_stats_assert) ||
4329 (pmap->stats.internal + pmap->stats.reusable) ==
4330 (num_internal + num_reusable)) {
4331 num_reusable_mismatch++;
4332 printf("pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4333 pmap,
4334 (uint64_t) va,
4335 bpte,
4336 epte,
4337 options,
4338 num_internal,
4339 num_removed,
4340 num_unwired,
4341 num_external,
4342 num_reusable,
4343 num_compressed,
4344 num_alt_internal,
4345 num_alt_compressed,
4346 num_pte_changed,
4347 pmap->stats.internal,
4348 pmap->stats.reusable);
4349 /* slight mismatch: fix it... */
4350 num_internal = pmap->stats.internal;
4351 num_reusable = pmap->stats.reusable;
4352 } else {
4353 panic("pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4354 pmap,
4355 (uint64_t) va,
4356 bpte,
4357 epte,
4358 options,
4359 num_internal,
4360 num_removed,
4361 num_unwired,
4362 num_external,
4363 num_reusable,
4364 num_compressed,
4365 num_alt_internal,
4366 num_alt_compressed,
4367 num_pte_changed,
4368 pmap->stats.internal,
4369 pmap->stats.reusable);
4370 }
4371 }
4372 #endif /* MACH_ASSERT */
4373 PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
4374 pmap,
4375 "pmap=%p num_external=%d stats.external=%d",
4376 pmap, num_external, pmap->stats.external);
4377 PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
4378 pmap,
4379 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4380 pmap,
4381 num_internal, pmap->stats.internal,
4382 num_reusable, pmap->stats.reusable);
4383 PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
4384 pmap,
4385 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4386 pmap,
4387 num_internal, pmap->stats.internal,
4388 num_reusable, pmap->stats.reusable);
4389 PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
4390 pmap,
4391 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4392 pmap, num_compressed, num_alt_compressed,
4393 pmap->stats.compressed);
4394
4395 /* update pmap stats... */
4396 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
4397 if (num_external)
4398 OSAddAtomic(-num_external, &pmap->stats.external);
4399 if (num_internal)
4400 OSAddAtomic(-num_internal, &pmap->stats.internal);
4401 if (num_reusable)
4402 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
4403 if (num_compressed)
4404 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
4405 /* ... and ledgers */
4406 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
4407 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
4408 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
4409 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
4410 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
4411 /* make needed adjustments to phys_footprint */
4412 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
4413 machine_ptob((num_internal -
4414 num_alt_internal) +
4415 (num_compressed -
4416 num_alt_compressed)));
4417 }
4418
4419 /* flush the ptable entries we have written */
4420 if (num_pte_changed > 0)
4421 FLUSH_PTE_RANGE(bpte, epte);
4422
4423 return num_pte_changed;
4424 }
4425
4426
4427 /*
4428 * Remove the given range of addresses
4429 * from the specified map.
4430 *
4431 * It is assumed that the start and end are properly
4432 * rounded to the hardware page size.
4433 */
4434 void
4435 pmap_remove(
4436 pmap_t pmap,
4437 vm_map_address_t start,
4438 vm_map_address_t end)
4439 {
4440 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
4441 }
4442
4443 static int
4444 pmap_remove_options_internal(pmap_t pmap,
4445 vm_map_address_t start,
4446 vm_map_address_t end,
4447 int options)
4448 {
4449 int remove_count = 0;
4450 pt_entry_t *bpte, *epte;
4451 pt_entry_t *pte_p;
4452 tt_entry_t *tte_p;
4453 uint32_t rmv_spte=0;
4454
4455 PMAP_LOCK(pmap);
4456
4457 tte_p = pmap_tte(pmap, start);
4458
4459 if (tte_p == (tt_entry_t *) NULL) {
4460 goto done;
4461 }
4462
4463 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4464 pte_p = (pt_entry_t *) ttetokv(*tte_p);
4465 bpte = &pte_p[ptenum(start)];
4466 epte = bpte + ((end - start) >> ARM_TT_LEAF_SHIFT);
4467
4468 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
4469 &rmv_spte, options);
4470
4471 #if (__ARM_VMSA__ == 7)
4472 if (rmv_spte && (ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
4473 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
4474 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L1_LEVEL);
4475 flush_mmu_tlb_entry((start & ~ARM_TT_L1_OFFMASK) | (pmap->asid & 0xff));
4476 }
4477 #else
4478 if (rmv_spte && (ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
4479 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
4480 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_L2_LEVEL);
4481 flush_mmu_tlb_entry(tlbi_addr(start & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
4482 }
4483 #endif
4484 }
4485
4486 done:
4487 PMAP_UNLOCK(pmap);
4488
4489 return remove_count;
4490 }
4491
4492 void
4493 pmap_remove_options(
4494 pmap_t pmap,
4495 vm_map_address_t start,
4496 vm_map_address_t end,
4497 int options)
4498 {
4499 int remove_count = 0;
4500 vm_map_address_t va;
4501
4502 if (pmap == PMAP_NULL)
4503 return;
4504
4505 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
4506 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
4507 VM_KERNEL_ADDRHIDE(end));
4508
4509 #if MACH_ASSERT
4510 if ((start|end) & PAGE_MASK) {
4511 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
4512 pmap, (uint64_t)start, (uint64_t)end);
4513 }
4514 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
4515 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
4516 pmap, (uint64_t)start, (uint64_t)end);
4517 }
4518 #endif
4519
4520 /*
4521 * Invalidate the translation buffer first
4522 */
4523 va = start;
4524 while (va < end) {
4525 vm_map_address_t l;
4526
4527 #if (__ARM_VMSA__ == 7)
4528 l = ((va + ARM_TT_L1_SIZE) & ~ARM_TT_L1_OFFMASK);
4529 #else
4530 l = ((va + ARM_TT_L2_SIZE) & ~ARM_TT_L2_OFFMASK);
4531 #endif
4532 if (l > end)
4533 l = end;
4534
4535 remove_count += pmap_remove_options_internal(pmap, va, l, options);
4536
4537 va = l;
4538 }
4539
4540
4541 if (remove_count > 0)
4542 PMAP_UPDATE_TLBS(pmap, start, end);
4543
4544 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
4545 }
4546
4547
4548 /*
4549 * Remove phys addr if mapped in specified map
4550 */
4551 void
4552 pmap_remove_some_phys(
4553 __unused pmap_t map,
4554 __unused ppnum_t pn)
4555 {
4556 /* Implement to support working set code */
4557 }
4558
4559
4560 void
4561 pmap_set_pmap(
4562 pmap_t pmap,
4563 #if !__ARM_USER_PROTECT__
4564 __unused
4565 #endif
4566 thread_t thread)
4567 {
4568 pmap_switch(pmap);
4569 #if __ARM_USER_PROTECT__
4570 if (pmap->tte_index_max == NTTES) {
4571 thread->machine.uptw_ttc = 2;
4572 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
4573 } else {
4574 thread->machine.uptw_ttc = 1; \
4575 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep ) | TTBR_SETUP;
4576 }
4577 thread->machine.asid = pmap->asid;
4578 #endif
4579 }
4580
4581 static void
4582 pmap_flush_core_tlb_asid(pmap_t pmap)
4583 {
4584 #if (__ARM_VMSA__ == 7)
4585 flush_core_tlb_asid(pmap->asid);
4586 #else
4587 flush_core_tlb_asid(((uint64_t) pmap->asid) << TLBI_ASID_SHIFT);
4588 #if __ARM_KERNEL_PROTECT__
4589 flush_core_tlb_asid(((uint64_t) pmap->asid + 1) << TLBI_ASID_SHIFT);
4590 #endif /* __ARM_KERNEL_PROTECT__ */
4591 #endif
4592 }
4593
4594 static void
4595 pmap_switch_internal(
4596 pmap_t pmap)
4597 {
4598 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
4599 uint32_t last_asid_high_bits, asid_high_bits;
4600 pmap_t cur_pmap;
4601 pmap_t cur_user_pmap;
4602 boolean_t do_asid_flush = FALSE;
4603
4604 #if (__ARM_VMSA__ == 7)
4605 if (not_in_kdp)
4606 simple_lock(&pmap->tt1_lock);
4607 #endif
4608
4609 cur_pmap = current_pmap();
4610 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
4611
4612 /* Paranoia. */
4613 assert(pmap->asid < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
4614
4615 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
4616 asid_high_bits = pmap->vasid >> ARM_ASID_SHIFT;
4617 last_asid_high_bits = (uint32_t) cpu_data_ptr->cpu_asid_high_bits[pmap->asid];
4618
4619 if (asid_high_bits != last_asid_high_bits) {
4620 /*
4621 * If the virtual ASID of the new pmap does not match the virtual ASID
4622 * last seen on this CPU for the physical ASID (that was a mouthful),
4623 * then this switch runs the risk of aliasing. We need to flush the
4624 * TLB for this phyiscal ASID in this case.
4625 */
4626 cpu_data_ptr->cpu_asid_high_bits[pmap->asid] = (uint8_t) asid_high_bits;
4627 do_asid_flush = TRUE;
4628 }
4629
4630 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
4631 if (cpu_data_ptr->cpu_user_pmap_stamp == pmap->stamp) {
4632 pmap_switch_user_ttb_internal(pmap);
4633
4634 #if (__ARM_VMSA__ == 7)
4635 if (not_in_kdp)
4636 simple_unlock(&pmap->tt1_lock);
4637 #endif
4638
4639 if (do_asid_flush) {
4640 pmap_flush_core_tlb_asid(pmap);
4641 }
4642
4643 return;
4644 } else
4645 cur_user_pmap = NULL;
4646 } else if ((cur_user_pmap == pmap) && (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp))
4647 cur_user_pmap = NULL;
4648
4649 pmap_switch_user_ttb_internal(pmap);
4650
4651 if (do_asid_flush) {
4652 pmap_flush_core_tlb_asid(pmap);
4653 }
4654
4655 #if (__ARM_VMSA__ == 7)
4656 if (not_in_kdp)
4657 simple_unlock(&pmap->tt1_lock);
4658 #else
4659 if (pmap != kernel_pmap) {
4660
4661 if (cur_user_pmap != PMAP_NULL) {
4662 /*
4663 * We have a low-address global mapping for the commpage
4664 * for 32-bit processes; flush it if we switch to a 64-bot
4665 * process.
4666 */
4667 if (pmap_is_64bit(pmap) && !pmap_is_64bit(cur_user_pmap)) {
4668 pmap_sharedpage_flush_32_to_64();
4669 }
4670
4671 } else
4672 flush_core_tlb();
4673 }
4674 #endif
4675 }
4676
4677 void
4678 pmap_switch(
4679 pmap_t pmap)
4680 {
4681 pmap_switch_internal(pmap);
4682 }
4683
4684 void
4685 pmap_page_protect(
4686 ppnum_t ppnum,
4687 vm_prot_t prot)
4688 {
4689 pmap_page_protect_options(ppnum, prot, 0, NULL);
4690 }
4691
4692 /*
4693 * Routine: pmap_page_protect_options
4694 *
4695 * Function:
4696 * Lower the permission for all mappings to a given
4697 * page.
4698 */
4699 static void
4700 pmap_page_protect_options_internal(
4701 ppnum_t ppnum,
4702 vm_prot_t prot,
4703 unsigned int options)
4704 {
4705 pmap_paddr_t phys = ptoa(ppnum);
4706 pv_entry_t **pv_h;
4707 pv_entry_t *pve_p;
4708 pv_entry_t *pveh_p;
4709 pv_entry_t *pvet_p;
4710 pt_entry_t *pte_p;
4711 int pai;
4712 boolean_t remove;
4713 boolean_t set_NX;
4714 unsigned int pvh_cnt = 0;
4715
4716 assert(ppnum != vm_page_fictitious_addr);
4717
4718 /* Only work with managed pages. */
4719 if (!pa_valid(phys)) {
4720 return;
4721 }
4722
4723 /*
4724 * Determine the new protection.
4725 */
4726 switch (prot) {
4727 case VM_PROT_ALL:
4728 return; /* nothing to do */
4729 case VM_PROT_READ:
4730 case VM_PROT_READ | VM_PROT_EXECUTE:
4731 remove = FALSE;
4732 break;
4733 default:
4734 remove = TRUE;
4735 break;
4736 }
4737
4738 pai = (int)pa_index(phys);
4739 LOCK_PVH(pai);
4740 pv_h = pai_to_pvh(pai);
4741
4742 pte_p = PT_ENTRY_NULL;
4743 pve_p = PV_ENTRY_NULL;
4744 pveh_p = PV_ENTRY_NULL;
4745 pvet_p = PV_ENTRY_NULL;
4746 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4747 pte_p = pvh_ptep(pv_h);
4748 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4749 pve_p = pvh_list(pv_h);
4750 pveh_p = pve_p;
4751 }
4752
4753 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
4754 vm_map_address_t va;
4755 pmap_t pmap;
4756 pt_entry_t tmplate;
4757 boolean_t update = FALSE;
4758
4759 if (pve_p != PV_ENTRY_NULL)
4760 pte_p = pve_get_ptep(pve_p);
4761
4762 pmap = ptep_get_pmap(pte_p);
4763 va = ptep_get_va(pte_p);
4764
4765 if (pte_p == PT_ENTRY_NULL) {
4766 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
4767 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
4768 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
4769 #if MACH_ASSERT
4770 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
4771
4772 pv_entry_t *check_pve_p = pveh_p;
4773 while (check_pve_p != PV_ENTRY_NULL) {
4774 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
4775 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
4776 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
4777 }
4778 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
4779 }
4780 }
4781 #endif
4782 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
4783 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
4784 }
4785
4786 #if DEVELOPMENT || DEBUG
4787 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
4788 #else
4789 if ((prot & VM_PROT_EXECUTE))
4790 #endif
4791 set_NX = FALSE;
4792 else
4793 set_NX = TRUE;
4794
4795 /* Remove the mapping if new protection is NONE */
4796 if (remove) {
4797 boolean_t is_altacct = FALSE;
4798
4799 if (IS_ALTACCT_PAGE(pai, pve_p)) {
4800 is_altacct = TRUE;
4801 } else {
4802 is_altacct = FALSE;
4803 }
4804
4805 if (pte_is_wired(*pte_p)) {
4806 pte_set_wired(pte_p, 0);
4807 if (pmap != kernel_pmap) {
4808 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
4809 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
4810 }
4811 }
4812
4813 if (*pte_p != ARM_PTE_TYPE_FAULT &&
4814 pmap != kernel_pmap &&
4815 (options & PMAP_OPTIONS_COMPRESSOR) &&
4816 IS_INTERNAL_PAGE(pai)) {
4817 assert(!ARM_PTE_IS_COMPRESSED(*pte_p));
4818 /* mark this PTE as having been "compressed" */
4819 tmplate = ARM_PTE_COMPRESSED;
4820 if (is_altacct) {
4821 tmplate |= ARM_PTE_COMPRESSED_ALT;
4822 is_altacct = TRUE;
4823 }
4824 } else {
4825 tmplate = ARM_PTE_TYPE_FAULT;
4826 }
4827
4828 if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
4829 tmplate == ARM_PTE_TYPE_FAULT &&
4830 (pmap != kernel_pmap)) {
4831 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0)
4832 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
4833 }
4834
4835 if (*pte_p != tmplate) {
4836 WRITE_PTE(pte_p, tmplate);
4837 update = TRUE;
4838 }
4839 pvh_cnt++;
4840 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
4841 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
4842
4843 #if MACH_ASSERT
4844 /*
4845 * We only ever compress internal pages.
4846 */
4847 if (options & PMAP_OPTIONS_COMPRESSOR) {
4848 assert(IS_INTERNAL_PAGE(pai));
4849 }
4850 #endif
4851
4852 if (pmap != kernel_pmap) {
4853 if (IS_REUSABLE_PAGE(pai) &&
4854 IS_INTERNAL_PAGE(pai) &&
4855 !is_altacct) {
4856 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
4857 OSAddAtomic(-1, &pmap->stats.reusable);
4858 } else if (IS_INTERNAL_PAGE(pai)) {
4859 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
4860 OSAddAtomic(-1, &pmap->stats.internal);
4861 } else {
4862 PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
4863 OSAddAtomic(-1, &pmap->stats.external);
4864 }
4865 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
4866 IS_INTERNAL_PAGE(pai)) {
4867 /* adjust "compressed" stats */
4868 OSAddAtomic64(+1, &pmap->stats.compressed);
4869 PMAP_STATS_PEAK(pmap->stats.compressed);
4870 pmap->stats.compressed_lifetime++;
4871 }
4872
4873 if (IS_ALTACCT_PAGE(pai, pve_p)) {
4874 assert(IS_INTERNAL_PAGE(pai));
4875 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
4876 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
4877 if (options & PMAP_OPTIONS_COMPRESSOR) {
4878 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
4879 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
4880 }
4881
4882 /*
4883 * Cleanup our marker before
4884 * we free this pv_entry.
4885 */
4886 CLR_ALTACCT_PAGE(pai, pve_p);
4887
4888 } else if (IS_REUSABLE_PAGE(pai)) {
4889 assert(IS_INTERNAL_PAGE(pai));
4890 if (options & PMAP_OPTIONS_COMPRESSOR) {
4891 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
4892 /* was not in footprint, but is now */
4893 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
4894 }
4895
4896 } else if (IS_INTERNAL_PAGE(pai)) {
4897 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
4898
4899 /*
4900 * Update all stats related to physical footprint, which only
4901 * deals with internal pages.
4902 */
4903 if (options & PMAP_OPTIONS_COMPRESSOR) {
4904 /*
4905 * This removal is only being done so we can send this page to
4906 * the compressor; therefore it mustn't affect total task footprint.
4907 */
4908 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
4909 } else {
4910 /*
4911 * This internal page isn't going to the compressor, so adjust stats to keep
4912 * phys_footprint up to date.
4913 */
4914 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
4915 }
4916 } else {
4917 /* external page: no impact on ledgers */
4918 }
4919 }
4920
4921 if (pve_p != PV_ENTRY_NULL) {
4922 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
4923 }
4924
4925 } else {
4926 pt_entry_t spte;
4927
4928 spte = *pte_p;
4929
4930 if (pmap == kernel_pmap)
4931 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
4932 else
4933 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
4934
4935 pte_set_ffr(tmplate, 0);
4936
4937 #if (__ARM_VMSA__ == 7)
4938 if (set_NX) {
4939 tmplate |= ARM_PTE_NX;
4940 } else {
4941 /*
4942 * While the naive implementation of this would serve to add execute
4943 * permission, this is not how the VM uses this interface, or how
4944 * x86_64 implements it. So ignore requests to add execute permissions.
4945 */
4946 #if 0
4947 tmplate &= ~ARM_PTE_NX;
4948 #else
4949 ;
4950 #endif
4951 }
4952 #else
4953 if (set_NX)
4954 tmplate |= ARM_PTE_NX | ARM_PTE_PNX;
4955 else {
4956 /*
4957 * While the naive implementation of this would serve to add execute
4958 * permission, this is not how the VM uses this interface, or how
4959 * x86_64 implements it. So ignore requests to add execute permissions.
4960 */
4961 #if 0
4962 if (pmap == kernel_pmap) {
4963 tmplate &= ~ARM_PTE_PNX;
4964 tmplate |= ARM_PTE_NX;
4965 } else {
4966 tmplate &= ~ARM_PTE_NX;
4967 tmplate |= ARM_PTE_PNX;
4968 }
4969 #else
4970 ;
4971 #endif
4972 }
4973 #endif
4974
4975
4976 if (*pte_p != ARM_PTE_TYPE_FAULT &&
4977 !ARM_PTE_IS_COMPRESSED(*pte_p) &&
4978 *pte_p != tmplate) {
4979 WRITE_PTE(pte_p, tmplate);
4980 update = TRUE;
4981 }
4982 }
4983
4984 /* Invalidate TLBs for all CPUs using it */
4985 if (update)
4986 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
4987
4988 pte_p = PT_ENTRY_NULL;
4989 pvet_p = pve_p;
4990 if (pve_p != PV_ENTRY_NULL) {
4991 pvet_p = pve_p;
4992 if (remove) {
4993 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
4994 }
4995 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4996 }
4997 }
4998
4999 /* if we removed a bunch of entries, take care of them now */
5000 if (remove) {
5001 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
5002 }
5003
5004 UNLOCK_PVH(pai);
5005
5006 if (remove && (pveh_p != PV_ENTRY_NULL)) {
5007 pv_list_free(pveh_p, pvet_p, pvh_cnt);
5008 }
5009 }
5010
5011 void
5012 pmap_page_protect_options(
5013 ppnum_t ppnum,
5014 vm_prot_t prot,
5015 unsigned int options,
5016 __unused void *arg)
5017 {
5018 pmap_paddr_t phys = ptoa(ppnum);
5019
5020 assert(ppnum != vm_page_fictitious_addr);
5021
5022 /* Only work with managed pages. */
5023 if (!pa_valid(phys))
5024 return;
5025
5026 /*
5027 * Determine the new protection.
5028 */
5029 if (prot == VM_PROT_ALL) {
5030 return; /* nothing to do */
5031 }
5032
5033 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5034
5035 pmap_page_protect_options_internal(ppnum, prot, options);
5036
5037 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5038 }
5039
5040 /*
5041 * Indicates if the pmap layer enforces some additional restrictions on the
5042 * given set of protections.
5043 */
5044 bool pmap_has_prot_policy(__unused vm_prot_t prot)
5045 {
5046 return FALSE;
5047 }
5048
5049 /*
5050 * Set the physical protection on the
5051 * specified range of this map as requested.
5052 * VERY IMPORTANT: Will not increase permissions.
5053 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5054 */
5055 void
5056 pmap_protect(
5057 pmap_t pmap,
5058 vm_map_address_t b,
5059 vm_map_address_t e,
5060 vm_prot_t prot)
5061 {
5062 pmap_protect_options(pmap, b, e, prot, 0, NULL);
5063 }
5064
5065 static void
5066 pmap_protect_options_internal(pmap_t pmap,
5067 vm_map_address_t start,
5068 vm_map_address_t end,
5069 vm_prot_t prot,
5070 unsigned int options,
5071 __unused void *args)
5072 {
5073 tt_entry_t *tte_p;
5074 pt_entry_t *bpte_p, *epte_p;
5075 pt_entry_t *pte_p;
5076 boolean_t set_NX = TRUE;
5077 #if (__ARM_VMSA__ > 7)
5078 boolean_t set_XO = FALSE;
5079 #endif
5080 boolean_t should_have_removed = FALSE;
5081
5082 #ifndef __ARM_IC_NOALIAS_ICACHE__
5083 boolean_t InvalidatePoU_Icache_Done = FALSE;
5084 #endif
5085
5086 #if DEVELOPMENT || DEBUG
5087 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5088 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5089 should_have_removed = TRUE;
5090 }
5091 } else
5092 #endif
5093 {
5094 /* Determine the new protection. */
5095 switch (prot) {
5096 #if (__ARM_VMSA__ > 7)
5097 case VM_PROT_EXECUTE:
5098 set_XO = TRUE;
5099 /* fall through */
5100 #endif
5101 case VM_PROT_READ:
5102 case VM_PROT_READ | VM_PROT_EXECUTE:
5103 break;
5104 case VM_PROT_READ | VM_PROT_WRITE:
5105 case VM_PROT_ALL:
5106 return; /* nothing to do */
5107 default:
5108 should_have_removed = TRUE;
5109 }
5110 }
5111
5112 if (should_have_removed) {
5113 panic("%s: should have been a remove operation, "
5114 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5115 __FUNCTION__,
5116 pmap, (void *)start, (void *)end, prot, options, args);
5117 }
5118
5119 #if DEVELOPMENT || DEBUG
5120 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5121 #else
5122 if ((prot & VM_PROT_EXECUTE))
5123 #endif
5124 {
5125 set_NX = FALSE;
5126 } else {
5127 set_NX = TRUE;
5128 }
5129
5130 PMAP_LOCK(pmap);
5131 tte_p = pmap_tte(pmap, start);
5132
5133 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5134 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
5135 bpte_p = &bpte_p[ptenum(start)];
5136 epte_p = bpte_p + arm_atop(end - start);
5137 pte_p = bpte_p;
5138
5139 for (pte_p = bpte_p;
5140 pte_p < epte_p;
5141 pte_p += PAGE_SIZE/ARM_PGBYTES) {
5142 pt_entry_t spte;
5143 #if DEVELOPMENT || DEBUG
5144 boolean_t force_write = FALSE;
5145 #endif
5146
5147 spte = *pte_p;
5148
5149 if ((spte == ARM_PTE_TYPE_FAULT) ||
5150 ARM_PTE_IS_COMPRESSED(spte)) {
5151 continue;
5152 }
5153
5154 pmap_paddr_t pa;
5155 int pai=0;
5156 boolean_t managed=FALSE;
5157
5158 while (!managed) {
5159 /*
5160 * It may be possible for the pte to transition from managed
5161 * to unmanaged in this timeframe; for now, elide the assert.
5162 * We should break out as a consequence of checking pa_valid.
5163 */
5164 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5165 pa = pte_to_pa(spte);
5166 if (!pa_valid(pa))
5167 break;
5168 pai = (int)pa_index(pa);
5169 LOCK_PVH(pai);
5170 spte = *pte_p;
5171 pa = pte_to_pa(spte);
5172 if (pai == (int)pa_index(pa)) {
5173 managed =TRUE;
5174 break; // Leave the PVH locked as we will unlock it after we free the PTE
5175 }
5176 UNLOCK_PVH(pai);
5177 }
5178
5179 if ((spte == ARM_PTE_TYPE_FAULT) ||
5180 ARM_PTE_IS_COMPRESSED(spte)) {
5181 continue;
5182 }
5183
5184 pt_entry_t tmplate;
5185
5186 if (pmap == kernel_pmap) {
5187 #if DEVELOPMENT || DEBUG
5188 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5189 force_write = TRUE;
5190 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
5191 } else
5192 #endif
5193 {
5194 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5195 }
5196 } else {
5197 #if DEVELOPMENT || DEBUG
5198 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5199 force_write = TRUE;
5200 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWRW));
5201 } else
5202 #endif
5203 {
5204 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
5205 }
5206 }
5207
5208 /*
5209 * XXX Removing "NX" would
5210 * grant "execute" access
5211 * immediately, bypassing any
5212 * checks VM might want to do
5213 * in its soft fault path.
5214 * pmap_protect() and co. are
5215 * not allowed to increase
5216 * access permissions.
5217 */
5218 #if (__ARM_VMSA__ == 7)
5219 if (set_NX)
5220 tmplate |= ARM_PTE_NX;
5221 else {
5222 /* do NOT clear "NX"! */
5223 }
5224 #else
5225 if (set_NX)
5226 tmplate |= ARM_PTE_NX | ARM_PTE_PNX;
5227 else {
5228 if (pmap == kernel_pmap) {
5229 /*
5230 * TODO: Run CS/Monitor checks here;
5231 * should we be clearing PNX here? Is
5232 * this just for dtrace?
5233 */
5234 tmplate &= ~ARM_PTE_PNX;
5235 tmplate |= ARM_PTE_NX;
5236 } else {
5237 /* do NOT clear "NX"! */
5238 tmplate |= ARM_PTE_PNX;
5239 if (set_XO) {
5240 tmplate &= ~ARM_PTE_APMASK;
5241 tmplate |= ARM_PTE_AP(AP_RONA);
5242 }
5243 }
5244 }
5245 #endif
5246
5247 #if DEVELOPMENT || DEBUG
5248 if (force_write) {
5249 /*
5250 * TODO: Run CS/Monitor checks here.
5251 */
5252 if (managed) {
5253 /*
5254 * We are marking the page as writable,
5255 * so we consider it to be modified and
5256 * referenced.
5257 */
5258 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5259 tmplate |= ARM_PTE_AF;
5260
5261 if (IS_REFFAULT_PAGE(pai)) {
5262 CLR_REFFAULT_PAGE(pai);
5263 }
5264
5265 if (IS_MODFAULT_PAGE(pai)) {
5266 CLR_MODFAULT_PAGE(pai);
5267 }
5268 }
5269 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5270 /*
5271 * An immediate request for anything other than
5272 * write should still mark the page as
5273 * referenced if managed.
5274 */
5275 if (managed) {
5276 pa_set_bits(pa, PP_ATTR_REFERENCED);
5277 tmplate |= ARM_PTE_AF;
5278
5279 if (IS_REFFAULT_PAGE(pai)) {
5280 CLR_REFFAULT_PAGE(pai);
5281 }
5282 }
5283 }
5284 #endif
5285
5286 /* We do not expect to write fast fault the entry. */
5287 pte_set_ffr(tmplate, 0);
5288
5289 /* TODO: Doesn't this need to worry about PNX? */
5290 if (((spte & ARM_PTE_NX) == ARM_PTE_NX) && (prot & VM_PROT_EXECUTE)) {
5291 CleanPoU_DcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
5292 #ifdef __ARM_IC_NOALIAS_ICACHE__
5293 InvalidatePoU_IcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
5294 #else
5295 if (!InvalidatePoU_Icache_Done) {
5296 InvalidatePoU_Icache();
5297 InvalidatePoU_Icache_Done = TRUE;
5298 }
5299 #endif
5300 }
5301
5302 WRITE_PTE_FAST(pte_p, tmplate);
5303
5304 if (managed) {
5305 ASSERT_PVH_LOCKED(pai);
5306 UNLOCK_PVH(pai);
5307 }
5308 }
5309
5310 FLUSH_PTE_RANGE(bpte_p, epte_p);
5311 PMAP_UPDATE_TLBS(pmap, start, end);
5312 }
5313
5314 PMAP_UNLOCK(pmap);
5315 }
5316
5317 void
5318 pmap_protect_options(
5319 pmap_t pmap,
5320 vm_map_address_t b,
5321 vm_map_address_t e,
5322 vm_prot_t prot,
5323 unsigned int options,
5324 __unused void *args)
5325 {
5326 vm_map_address_t l, beg;
5327
5328 if ((b|e) & PAGE_MASK) {
5329 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5330 pmap, (uint64_t)b, (uint64_t)e);
5331 }
5332
5333 #if DEVELOPMENT || DEBUG
5334 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5335 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5336 pmap_remove_options(pmap, b, e, options);
5337 return;
5338 }
5339 } else
5340 #endif
5341 {
5342 /* Determine the new protection. */
5343 switch (prot) {
5344 case VM_PROT_EXECUTE:
5345 case VM_PROT_READ:
5346 case VM_PROT_READ | VM_PROT_EXECUTE:
5347 break;
5348 case VM_PROT_READ | VM_PROT_WRITE:
5349 case VM_PROT_ALL:
5350 return; /* nothing to do */
5351 default:
5352 pmap_remove_options(pmap, b, e, options);
5353 return;
5354 }
5355 }
5356
5357 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
5358 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
5359 VM_KERNEL_ADDRHIDE(e));
5360
5361 beg = b;
5362
5363 while (beg < e) {
5364 l = ((beg + ARM_TT_TWIG_SIZE) & ~ARM_TT_TWIG_OFFMASK);
5365
5366 if (l > e)
5367 l = e;
5368
5369 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
5370
5371 beg = l;
5372 }
5373
5374 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5375 }
5376
5377 /* Map a (possibly) autogenned block */
5378 kern_return_t
5379 pmap_map_block(
5380 pmap_t pmap,
5381 addr64_t va,
5382 ppnum_t pa,
5383 uint32_t size,
5384 vm_prot_t prot,
5385 int attr,
5386 __unused unsigned int flags)
5387 {
5388 kern_return_t kr;
5389 addr64_t original_va = va;
5390 uint32_t page;
5391
5392 for (page = 0; page < size; page++) {
5393 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
5394
5395 if (kr != KERN_SUCCESS) {
5396 /*
5397 * This will panic for now, as it is unclear that
5398 * removing the mappings is correct.
5399 */
5400 panic("%s: failed pmap_enter, "
5401 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
5402 __FUNCTION__,
5403 pmap, va, pa, size, prot, flags);
5404
5405 pmap_remove(pmap, original_va, va - original_va);
5406 return kr;
5407 }
5408
5409 va += PAGE_SIZE;
5410 pa++;
5411 }
5412
5413 return KERN_SUCCESS;
5414 }
5415
5416 /*
5417 * Insert the given physical page (p) at
5418 * the specified virtual address (v) in the
5419 * target physical map with the protection requested.
5420 *
5421 * If specified, the page will be wired down, meaning
5422 * that the related pte can not be reclaimed.
5423 *
5424 * NB: This is the only routine which MAY NOT lazy-evaluate
5425 * or lose information. That is, this routine must actually
5426 * insert this page into the given map eventually (must make
5427 * forward progress eventually.
5428 */
5429 kern_return_t
5430 pmap_enter(
5431 pmap_t pmap,
5432 vm_map_address_t v,
5433 ppnum_t pn,
5434 vm_prot_t prot,
5435 vm_prot_t fault_type,
5436 unsigned int flags,
5437 boolean_t wired)
5438 {
5439 return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
5440 }
5441
5442
5443 static inline void pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
5444 {
5445 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED)))
5446 {
5447 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
5448 if (pte & ARM_PTE_WIRED) {
5449 OSAddAtomic16(1, ptd_wiredcnt_ptr);
5450 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5451 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
5452 } else {
5453 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
5454 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5455 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5456 }
5457 }
5458 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5459 !ARM_PTE_IS_COMPRESSED(*pte_p)) {
5460 WRITE_PTE(pte_p, pte);
5461 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
5462 } else {
5463 WRITE_PTE(pte_p, pte);
5464 __asm__ volatile("isb");
5465 }
5466 }
5467
5468 static pt_entry_t
5469 wimg_to_pte(unsigned int wimg)
5470 {
5471 pt_entry_t pte;
5472
5473 switch (wimg & (VM_WIMG_MASK)) {
5474 case VM_WIMG_IO:
5475 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5476 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5477 break;
5478 case VM_WIMG_POSTED:
5479 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
5480 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5481 break;
5482 case VM_WIMG_WCOMB:
5483 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5484 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5485 break;
5486 case VM_WIMG_WTHRU:
5487 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
5488 #if (__ARM_VMSA__ > 7)
5489 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5490 #else
5491 pte |= ARM_PTE_SH;
5492 #endif
5493 break;
5494 case VM_WIMG_COPYBACK:
5495 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
5496 #if (__ARM_VMSA__ > 7)
5497 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5498 #else
5499 pte |= ARM_PTE_SH;
5500 #endif
5501 break;
5502 case VM_WIMG_INNERWBACK:
5503 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
5504 #if (__ARM_VMSA__ > 7)
5505 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
5506 #else
5507 pte |= ARM_PTE_SH;
5508 #endif
5509 break;
5510 default:
5511 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
5512 #if (__ARM_VMSA__ > 7)
5513 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5514 #else
5515 pte |= ARM_PTE_SH;
5516 #endif
5517 }
5518
5519 return pte;
5520 }
5521
5522 static kern_return_t
5523 pmap_enter_options_internal(
5524 pmap_t pmap,
5525 vm_map_address_t v,
5526 ppnum_t pn,
5527 vm_prot_t prot,
5528 vm_prot_t fault_type,
5529 unsigned int flags,
5530 boolean_t wired,
5531 unsigned int options)
5532 {
5533 pmap_paddr_t pa = ptoa(pn);
5534 pt_entry_t pte;
5535 pt_entry_t spte;
5536 pt_entry_t *pte_p;
5537 pv_entry_t *pve_p;
5538 boolean_t set_NX;
5539 boolean_t set_XO = FALSE;
5540 boolean_t refcnt_updated;
5541 boolean_t wiredcnt_updated;
5542 unsigned int wimg_bits;
5543 boolean_t was_compressed, was_alt_compressed;
5544
5545 if ((v) & PAGE_MASK) {
5546 panic("pmap_enter_options() pmap %p v 0x%llx\n",
5547 pmap, (uint64_t)v);
5548 }
5549
5550 if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
5551 panic("pmap_enter_options(): WX request on kernel_pmap");
5552 }
5553
5554 #if DEVELOPMENT || DEBUG
5555 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5556 #else
5557 if ((prot & VM_PROT_EXECUTE))
5558 #endif
5559 set_NX = FALSE;
5560 else
5561 set_NX = TRUE;
5562
5563 #if (__ARM_VMSA__ > 7)
5564 if (prot == VM_PROT_EXECUTE) {
5565 set_XO = TRUE;
5566 }
5567 #endif
5568
5569 assert(pn != vm_page_fictitious_addr);
5570
5571 refcnt_updated = FALSE;
5572 wiredcnt_updated = FALSE;
5573 pve_p = PV_ENTRY_NULL;
5574 was_compressed = FALSE;
5575 was_alt_compressed = FALSE;
5576
5577 PMAP_LOCK(pmap);
5578
5579 /*
5580 * Expand pmap to include this pte. Assume that
5581 * pmap is always expanded to include enough hardware
5582 * pages to map one VM page.
5583 */
5584 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
5585 /* Must unlock to expand the pmap. */
5586 PMAP_UNLOCK(pmap);
5587
5588 kern_return_t kr=pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
5589
5590 if(kr) {
5591 return kr;
5592 }
5593
5594 PMAP_LOCK(pmap);
5595 }
5596
5597 if (options & PMAP_OPTIONS_NOENTER) {
5598 PMAP_UNLOCK(pmap);
5599 return KERN_SUCCESS;
5600 }
5601
5602 Pmap_enter_retry:
5603
5604 spte = *pte_p;
5605
5606 if (ARM_PTE_IS_COMPRESSED(spte)) {
5607 /*
5608 * "pmap" should be locked at this point, so this should
5609 * not race with another pmap_enter() or pmap_remove_range().
5610 */
5611 assert(pmap != kernel_pmap);
5612
5613 /* one less "compressed" */
5614 OSAddAtomic64(-1, &pmap->stats.compressed);
5615 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
5616 PAGE_SIZE);
5617
5618 was_compressed = TRUE;
5619 if (spte & ARM_PTE_COMPRESSED_ALT) {
5620 was_alt_compressed = TRUE;
5621 pmap_ledger_debit(
5622 pmap,
5623 task_ledgers.alternate_accounting_compressed,
5624 PAGE_SIZE);
5625 } else {
5626 /* was part of the footprint */
5627 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5628 }
5629
5630 /* clear "compressed" marker */
5631 /* XXX is it necessary since we're about to overwrite it ? */
5632 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
5633 spte = ARM_PTE_TYPE_FAULT;
5634
5635 /*
5636 * We're replacing a "compressed" marker with a valid PTE,
5637 * so no change for "refcnt".
5638 */
5639 refcnt_updated = TRUE;
5640 }
5641
5642 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
5643 pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
5644 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
5645 }
5646
5647 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
5648
5649 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
5650 * wired memory statistics for user pmaps, but kernel PTEs are assumed
5651 * to be wired in nearly all cases. For VM layer functionality, the wired
5652 * count in vm_page_t is sufficient. */
5653 if (wired && pmap != kernel_pmap)
5654 pte |= ARM_PTE_WIRED;
5655
5656 #if (__ARM_VMSA__ == 7)
5657 if (set_NX)
5658 pte |= ARM_PTE_NX;
5659 #else
5660 if (set_NX)
5661 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5662 else {
5663 if (pmap == kernel_pmap) {
5664 pte |= ARM_PTE_NX;
5665 } else {
5666 pte |= ARM_PTE_PNX;
5667 }
5668 }
5669 #endif
5670
5671 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT)))
5672 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
5673 else
5674 wimg_bits = pmap_cache_attributes(pn);
5675
5676 pte |= wimg_to_pte(wimg_bits);
5677
5678 if (pmap == kernel_pmap) {
5679 #if __ARM_KERNEL_PROTECT__
5680 pte |= ARM_PTE_NG;
5681 #endif /* __ARM_KERNEL_PROTECT__ */
5682 if (prot & VM_PROT_WRITE) {
5683 pte |= ARM_PTE_AP(AP_RWNA);
5684 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
5685 } else {
5686 pte |= ARM_PTE_AP(AP_RONA);
5687 pa_set_bits(pa, PP_ATTR_REFERENCED);
5688 }
5689 #if (__ARM_VMSA__ == 7)
5690 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH))
5691 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
5692 #endif
5693 } else {
5694 if (!(pmap->nested)) {
5695 pte |= ARM_PTE_NG;
5696 } else if ((pmap->nested_region_asid_bitmap)
5697 && (v >= pmap->nested_region_subord_addr)
5698 && (v < (pmap->nested_region_subord_addr+pmap->nested_region_size))) {
5699
5700 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> ARM_TT_TWIG_SHIFT);
5701
5702 if ((pmap->nested_region_asid_bitmap)
5703 && testbit(index, (int *)pmap->nested_region_asid_bitmap))
5704 pte |= ARM_PTE_NG;
5705 }
5706 #if MACH_ASSERT
5707 if (pmap->nested_pmap != NULL) {
5708 vm_map_address_t nest_vaddr;
5709 pt_entry_t *nest_pte_p;
5710
5711 nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
5712
5713 if ((nest_vaddr >= pmap->nested_region_subord_addr)
5714 && (nest_vaddr < (pmap->nested_region_subord_addr+pmap->nested_region_size))
5715 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
5716 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
5717 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p))
5718 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
5719 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> ARM_TT_TWIG_SHIFT);
5720
5721 if ((pmap->nested_pmap->nested_region_asid_bitmap)
5722 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
5723
5724 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
5725 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
5726 }
5727 }
5728
5729 }
5730 #endif
5731 if (prot & VM_PROT_WRITE) {
5732
5733 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
5734 if (fault_type & VM_PROT_WRITE) {
5735 if (set_XO)
5736 pte |= ARM_PTE_AP(AP_RWNA);
5737 else
5738 pte |= ARM_PTE_AP(AP_RWRW);
5739 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5740 } else {
5741 if (set_XO)
5742 pte |= ARM_PTE_AP(AP_RONA);
5743 else
5744 pte |= ARM_PTE_AP(AP_RORO);
5745 pa_set_bits(pa, PP_ATTR_REFERENCED);
5746 pte_set_ffr(pte, 1);
5747 }
5748 } else {
5749 if (set_XO)
5750 pte |= ARM_PTE_AP(AP_RWNA);
5751 else
5752 pte |= ARM_PTE_AP(AP_RWRW);
5753 pa_set_bits(pa, PP_ATTR_REFERENCED);
5754 }
5755 } else {
5756
5757 if (set_XO)
5758 pte |= ARM_PTE_AP(AP_RONA);
5759 else
5760 pte |= ARM_PTE_AP(AP_RORO);
5761 pa_set_bits(pa, PP_ATTR_REFERENCED);
5762 }
5763 }
5764
5765 pte |= ARM_PTE_AF;
5766
5767 volatile uint16_t *refcnt = NULL;
5768 volatile uint16_t *wiredcnt = NULL;
5769 if (pmap != kernel_pmap) {
5770 refcnt = &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].refcnt);
5771 wiredcnt = &(ptep_get_ptd(pte_p)->pt_cnt[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
5772 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
5773 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
5774 * a new PV entry. */
5775 if (!wiredcnt_updated) {
5776 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
5777 wiredcnt_updated = TRUE;
5778 }
5779 if (!refcnt_updated) {
5780 OSAddAtomic16(1, (volatile int16_t*)refcnt);
5781 refcnt_updated = TRUE;
5782 }
5783 }
5784
5785 if (pa_valid(pa)) {
5786 pv_entry_t **pv_h;
5787 int pai;
5788 boolean_t is_altacct, is_internal;
5789
5790 is_internal = FALSE;
5791 is_altacct = FALSE;
5792
5793 pai = (int)pa_index(pa);
5794 pv_h = pai_to_pvh(pai);
5795
5796 LOCK_PVH(pai);
5797 Pmap_enter_loop:
5798
5799 if (pte == *pte_p) {
5800 /*
5801 * This pmap_enter operation has been completed by another thread
5802 * undo refcnt on pt and return
5803 */
5804 if (refcnt != NULL) {
5805 assert(refcnt_updated);
5806 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0)
5807 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
5808 }
5809 UNLOCK_PVH(pai);
5810 goto Pmap_enter_return;
5811 } else if (pte_to_pa(*pte_p) == pa) {
5812 if (refcnt != NULL) {
5813 assert(refcnt_updated);
5814 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0)
5815 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
5816 }
5817 pmap_enter_pte(pmap, pte_p, pte, v);
5818 UNLOCK_PVH(pai);
5819 goto Pmap_enter_return;
5820 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
5821 /*
5822 * pte has been modified by another thread
5823 * hold refcnt on pt and retry pmap_enter operation
5824 */
5825 UNLOCK_PVH(pai);
5826 goto Pmap_enter_retry;
5827 }
5828 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
5829 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
5830 /* 1st mapping: see what kind of page it is */
5831 if (options & PMAP_OPTIONS_INTERNAL) {
5832 SET_INTERNAL_PAGE(pai);
5833 } else {
5834 CLR_INTERNAL_PAGE(pai);
5835 }
5836 if ((options & PMAP_OPTIONS_INTERNAL) &&
5837 (options & PMAP_OPTIONS_REUSABLE)) {
5838 SET_REUSABLE_PAGE(pai);
5839 } else {
5840 CLR_REUSABLE_PAGE(pai);
5841 }
5842 if (pmap != kernel_pmap &&
5843 ((options & PMAP_OPTIONS_ALT_ACCT) ||
5844 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
5845 IS_INTERNAL_PAGE(pai)) {
5846 /*
5847 * Make a note to ourselves that this mapping is using alternative
5848 * accounting. We'll need this in order to know which ledger to
5849 * debit when the mapping is removed.
5850 *
5851 * The altacct bit must be set while the pv head is locked. Defer
5852 * the ledger accounting until after we've dropped the lock.
5853 */
5854 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5855 is_altacct = TRUE;
5856 } else {
5857 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5858 }
5859 } else {
5860 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5861 pt_entry_t *pte1_p;
5862
5863 /*
5864 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
5865 */
5866 pte1_p = pvh_ptep(pv_h);
5867 if((pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, &pve_p))) {
5868 goto Pmap_enter_loop;
5869 }
5870 pve_set_ptep(pve_p, pte1_p);
5871 pve_p->pve_next = PV_ENTRY_NULL;
5872
5873 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
5874 /*
5875 * transfer "altacct" from
5876 * pp_attr to this pve
5877 */
5878 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
5879 SET_ALTACCT_PAGE(pai, pve_p);
5880 }
5881 pvh_update_head(pv_h, pve_p, PVH_TYPE_PVEP);
5882 pve_p = PV_ENTRY_NULL;
5883 }
5884 /*
5885 * Set up pv_entry for this new mapping and then
5886 * add it to the list for this physical page.
5887 */
5888 if((pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, &pve_p))) {
5889 goto Pmap_enter_loop;
5890 }
5891 pve_set_ptep(pve_p, pte_p);
5892 pve_p->pve_next = PV_ENTRY_NULL;
5893
5894 pvh_add(pv_h, pve_p);
5895
5896 if (pmap != kernel_pmap &&
5897 ((options & PMAP_OPTIONS_ALT_ACCT) ||
5898 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
5899 IS_INTERNAL_PAGE(pai)) {
5900 /*
5901 * Make a note to ourselves that this
5902 * mapping is using alternative
5903 * accounting. We'll need this in order
5904 * to know which ledger to debit when
5905 * the mapping is removed.
5906 *
5907 * The altacct bit must be set while
5908 * the pv head is locked. Defer the
5909 * ledger accounting until after we've
5910 * dropped the lock.
5911 */
5912 SET_ALTACCT_PAGE(pai, pve_p);
5913 is_altacct = TRUE;
5914 }
5915
5916 pve_p = PV_ENTRY_NULL;
5917 }
5918
5919 pmap_enter_pte(pmap, pte_p, pte, v);
5920
5921 if (pmap != kernel_pmap) {
5922 if (IS_REUSABLE_PAGE(pai) &&
5923 !is_altacct) {
5924 assert(IS_INTERNAL_PAGE(pai));
5925 OSAddAtomic(+1, &pmap->stats.reusable);
5926 PMAP_STATS_PEAK(pmap->stats.reusable);
5927 } else if (IS_INTERNAL_PAGE(pai)) {
5928 OSAddAtomic(+1, &pmap->stats.internal);
5929 PMAP_STATS_PEAK(pmap->stats.internal);
5930 is_internal = TRUE;
5931 } else {
5932 OSAddAtomic(+1, &pmap->stats.external);
5933 PMAP_STATS_PEAK(pmap->stats.external);
5934 }
5935 }
5936
5937 UNLOCK_PVH(pai);
5938
5939 if (pmap != kernel_pmap) {
5940 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
5941
5942 if (is_internal) {
5943 /*
5944 * Make corresponding adjustments to
5945 * phys_footprint statistics.
5946 */
5947 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
5948 if (is_altacct) {
5949 /*
5950 * If this page is internal and
5951 * in an IOKit region, credit
5952 * the task's total count of
5953 * dirty, internal IOKit pages.
5954 * It should *not* count towards
5955 * the task's total physical
5956 * memory footprint, because
5957 * this entire region was
5958 * already billed to the task
5959 * at the time the mapping was
5960 * created.
5961 *
5962 * Put another way, this is
5963 * internal++ and
5964 * alternate_accounting++, so
5965 * net effect on phys_footprint
5966 * is 0. That means: don't
5967 * touch phys_footprint here.
5968 */
5969 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
5970 } else {
5971 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5972 }
5973 }
5974 }
5975
5976 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
5977 if (pmap->stats.resident_count > pmap->stats.resident_max)
5978 pmap->stats.resident_max = pmap->stats.resident_count;
5979 } else {
5980 pmap_enter_pte(pmap, pte_p, pte, v);
5981 }
5982
5983 Pmap_enter_return:
5984
5985 #if CONFIG_PGTRACE
5986 if (pgtrace_enabled) {
5987 // Clone and invalidate original mapping if eligible
5988 for (int i = 0; i < PAGE_RATIO; i++) {
5989 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES*i, 0, 0);
5990 }
5991 }
5992 #endif
5993
5994 if (pve_p != PV_ENTRY_NULL)
5995 pv_free(pve_p);
5996
5997 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0))
5998 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
5999
6000 PMAP_UNLOCK(pmap);
6001
6002 return KERN_SUCCESS;
6003 }
6004
6005 kern_return_t
6006 pmap_enter_options(
6007 pmap_t pmap,
6008 vm_map_address_t v,
6009 ppnum_t pn,
6010 vm_prot_t prot,
6011 vm_prot_t fault_type,
6012 unsigned int flags,
6013 boolean_t wired,
6014 unsigned int options,
6015 __unused void *arg)
6016 {
6017 kern_return_t kr = KERN_FAILURE;
6018
6019 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
6020 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
6021
6022 kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
6023
6024 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
6025
6026 return kr;
6027 }
6028
6029 /*
6030 * Routine: pmap_change_wiring
6031 * Function: Change the wiring attribute for a map/virtual-address
6032 * pair.
6033 * In/out conditions:
6034 * The mapping must already exist in the pmap.
6035 */
6036 static void
6037 pmap_change_wiring_internal(
6038 pmap_t pmap,
6039 vm_map_address_t v,
6040 boolean_t wired)
6041 {
6042 pt_entry_t *pte_p;
6043 pmap_paddr_t pa;
6044
6045 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6046 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6047 * to be wired in nearly all cases. For VM layer functionality, the wired
6048 * count in vm_page_t is sufficient. */
6049 if (pmap == kernel_pmap) {
6050 return;
6051 }
6052
6053 PMAP_LOCK(pmap);
6054 pte_p = pmap_pte(pmap, v);
6055 assert(pte_p != PT_ENTRY_NULL);
6056 pa = pte_to_pa(*pte_p);
6057 if (pa_valid(pa))
6058 LOCK_PVH((int)pa_index(pa));
6059
6060 if (wired && !pte_is_wired(*pte_p)) {
6061 pte_set_wired(pte_p, wired);
6062 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
6063 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6064 } else if (!wired && pte_is_wired(*pte_p)) {
6065 PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
6066 pte_set_wired(pte_p, wired);
6067 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6068 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6069 }
6070
6071 if (pa_valid(pa))
6072 UNLOCK_PVH((int)pa_index(pa));
6073
6074 PMAP_UNLOCK(pmap);
6075 }
6076
6077 void
6078 pmap_change_wiring(
6079 pmap_t pmap,
6080 vm_map_address_t v,
6081 boolean_t wired)
6082 {
6083 pmap_change_wiring_internal(pmap, v, wired);
6084 }
6085
6086 static ppnum_t
6087 pmap_find_phys_internal(
6088 pmap_t pmap,
6089 addr64_t va)
6090 {
6091 ppnum_t ppn=0;
6092
6093 if (pmap != kernel_pmap) {
6094 PMAP_LOCK(pmap);
6095 }
6096
6097 ppn = pmap_vtophys(pmap, va);
6098
6099 if (pmap != kernel_pmap) {
6100 PMAP_UNLOCK(pmap);
6101 }
6102
6103 return ppn;
6104 }
6105
6106 ppnum_t
6107 pmap_find_phys(
6108 pmap_t pmap,
6109 addr64_t va)
6110 {
6111 pmap_paddr_t pa=0;
6112
6113 if (pmap == kernel_pmap)
6114 pa = mmu_kvtop(va);
6115 else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map)))
6116 pa = mmu_uvtop(va);
6117
6118 if (pa) return (ppnum_t)(pa >> PAGE_SHIFT);
6119
6120 if (not_in_kdp) {
6121 return pmap_find_phys_internal(pmap, va);
6122 } else {
6123 return pmap_vtophys(pmap, va);
6124 }
6125 }
6126
6127 pmap_paddr_t
6128 kvtophys(
6129 vm_offset_t va)
6130 {
6131 pmap_paddr_t pa;
6132
6133 pa = mmu_kvtop(va);
6134 if (pa) return pa;
6135 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
6136 if (pa)
6137 pa |= (va & PAGE_MASK);
6138
6139 return ((pmap_paddr_t)pa);
6140 }
6141
6142 ppnum_t
6143 pmap_vtophys(
6144 pmap_t pmap,
6145 addr64_t va)
6146 {
6147 if ((va < pmap->min) || (va >= pmap->max)) {
6148 return 0;
6149 }
6150
6151 #if (__ARM_VMSA__ == 7)
6152 tt_entry_t *tte_p, tte;
6153 pt_entry_t *pte_p;
6154 ppnum_t ppn;
6155
6156 tte_p = pmap_tte(pmap, va);
6157 if (tte_p == (tt_entry_t *) NULL)
6158 return (ppnum_t) 0;
6159
6160 tte = *tte_p;
6161 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6162 pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
6163 ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
6164 #if DEVELOPMENT || DEBUG
6165 if (ppn != 0 &&
6166 ARM_PTE_IS_COMPRESSED(*pte_p)) {
6167 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6168 pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
6169 }
6170 #endif /* DEVELOPMENT || DEBUG */
6171 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK)
6172 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER)
6173 ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
6174 else
6175 ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
6176 else
6177 ppn = 0;
6178 #else
6179 tt_entry_t *ttp;
6180 tt_entry_t tte;
6181 ppnum_t ppn=0;
6182
6183 /* Level 0 currently unused */
6184
6185 #if __ARM64_TWO_LEVEL_PMAP__
6186 /* We have no L1 entry; go straight to the L2 entry */
6187 ttp = pmap_tt2e(pmap, va);
6188 tte = *ttp;
6189 #else
6190 /* Get first-level (1GB) entry */
6191 ttp = pmap_tt1e(pmap, va);
6192 tte = *ttp;
6193 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID))
6194 return (ppn);
6195
6196 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, va)];
6197 #endif
6198 if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID))
6199 return (ppn);
6200
6201 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6202 ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK)| (va & ARM_TT_L2_OFFMASK));
6203 return(ppn);
6204 }
6205 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, va)];
6206 ppn = (ppnum_t) atop((tte & ARM_PTE_MASK)| (va & ARM_TT_L3_OFFMASK));
6207 #endif
6208
6209 return ppn;
6210 }
6211
6212 static vm_offset_t
6213 pmap_extract_internal(
6214 pmap_t pmap,
6215 vm_map_address_t va)
6216 {
6217 pmap_paddr_t pa=0;
6218 ppnum_t ppn=0;
6219
6220 if (pmap == NULL) {
6221 return 0;
6222 }
6223
6224 PMAP_LOCK(pmap);
6225
6226 ppn = pmap_vtophys(pmap, va);
6227
6228 if (ppn != 0)
6229 pa = ptoa(ppn)| ((va) & PAGE_MASK);
6230
6231 PMAP_UNLOCK(pmap);
6232
6233 return pa;
6234 }
6235
6236 /*
6237 * Routine: pmap_extract
6238 * Function:
6239 * Extract the physical page address associated
6240 * with the given map/virtual_address pair.
6241 *
6242 */
6243 vm_offset_t
6244 pmap_extract(
6245 pmap_t pmap,
6246 vm_map_address_t va)
6247 {
6248 pmap_paddr_t pa=0;
6249
6250 if (pmap == kernel_pmap)
6251 pa = mmu_kvtop(va);
6252 else if (pmap == vm_map_pmap(current_thread()->map))
6253 pa = mmu_uvtop(va);
6254
6255 if (pa) return pa;
6256
6257 return pmap_extract_internal(pmap, va);
6258 }
6259
6260 /*
6261 * pmap_init_pte_page - Initialize a page table page.
6262 */
6263 void
6264 pmap_init_pte_page(
6265 pmap_t pmap,
6266 pt_entry_t *pte_p,
6267 vm_offset_t va,
6268 unsigned int ttlevel,
6269 boolean_t alloc_ptd)
6270 {
6271 pt_desc_t *ptdp;
6272
6273 ptdp = *(pt_desc_t **)pai_to_pvh(pa_index((((vm_offset_t)pte_p) - gVirtBase + gPhysBase)));
6274
6275 if (ptdp == NULL) {
6276 if (alloc_ptd) {
6277 /*
6278 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
6279 * on 4KB hardware, we may already have allocated a page table descriptor for a
6280 * bootstrap request, so we check for an existing PTD here.
6281 */
6282 ptdp = ptd_alloc(pmap);
6283 *(pt_desc_t **)pai_to_pvh(pa_index((((vm_offset_t)pte_p) - gVirtBase + gPhysBase))) = ptdp;
6284 } else {
6285 panic("pmap_init_pte_page(): pte_p %p\n", pte_p);
6286 }
6287 }
6288
6289 pmap_init_pte_page_internal(pmap, pte_p, va, ttlevel, &ptdp);
6290 }
6291
6292 /*
6293 * pmap_init_pte_page_internal - Initialize page table page and page table descriptor
6294 */
6295 void
6296 pmap_init_pte_page_internal(
6297 pmap_t pmap,
6298 pt_entry_t *pte_p,
6299 vm_offset_t va,
6300 unsigned int ttlevel,
6301 pt_desc_t **ptdp)
6302 {
6303 bzero(pte_p, ARM_PGBYTES);
6304 // below barrier ensures the page zeroing is visible to PTW before
6305 // it is linked to the PTE of previous level
6306 __asm__ volatile("DMB ST" : : : "memory");
6307 ptd_init(*ptdp, pmap, va, ttlevel, pte_p);
6308 }
6309
6310 /*
6311 * pmap_init_pte_static_page - for static mappings to a known contiguous range of pa's
6312 * Called from arm_vm_init().
6313 */
6314 void
6315 pmap_init_pte_static_page(
6316 __unused pmap_t pmap,
6317 pt_entry_t * pte_p,
6318 pmap_paddr_t pa)
6319 {
6320 #if (__ARM_VMSA__ == 7)
6321 unsigned int i;
6322 pt_entry_t *pte_cur;
6323
6324 for (i = 0, pte_cur = pte_p;
6325 i < (ARM_PGBYTES / sizeof(*pte_p));
6326 i++, pa += PAGE_SIZE) {
6327 if (pa >= avail_end) {
6328 /* We don't want to map memory xnu does not own through this routine. */
6329 break;
6330 }
6331
6332 *pte_cur = pa_to_pte(pa)
6333 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_SH | ARM_PTE_AP(AP_RONA)
6334 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
6335 pte_cur++;
6336 }
6337 #else
6338 unsigned int i;
6339 pt_entry_t *pte_cur;
6340 pt_entry_t template;
6341
6342 template = ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_AP(AP_RONA) | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) | ARM_PTE_NX;
6343
6344 for (i = 0, pte_cur = pte_p;
6345 i < (ARM_PGBYTES / sizeof(*pte_p));
6346 i++, pa += PAGE_SIZE) {
6347 if (pa >= avail_end) {
6348 /* We don't want to map memory xnu does not own through this routine. */
6349 break;
6350 }
6351
6352 /* TEST_PAGE_RATIO_4 may be pre-processor defined to 0 */
6353 __unreachable_ok_push
6354 if (TEST_PAGE_RATIO_4) {
6355 *pte_cur = pa_to_pte(pa) | template;
6356 *(pte_cur+1) = pa_to_pte(pa+0x1000) | template;
6357 *(pte_cur+2) = pa_to_pte(pa+0x2000) | template;
6358 *(pte_cur+3) = pa_to_pte(pa+0x3000) | template;
6359 pte_cur += 4;
6360 } else {
6361 *pte_cur = pa_to_pte(pa) | template;
6362 pte_cur++;
6363 }
6364 __unreachable_ok_pop
6365 }
6366 #endif
6367 bzero(pte_cur, ARM_PGBYTES - ((vm_offset_t)pte_cur - (vm_offset_t)pte_p));
6368 }
6369
6370
6371 /*
6372 * Routine: pmap_expand
6373 *
6374 * Expands a pmap to be able to map the specified virtual address.
6375 *
6376 * Allocates new memory for the default (COARSE) translation table
6377 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6378 * also allocates space for the corresponding pv entries.
6379 *
6380 * Nothing should be locked.
6381 */
6382 static kern_return_t
6383 pmap_expand(
6384 pmap_t pmap,
6385 vm_map_address_t v,
6386 unsigned int options,
6387 unsigned int level)
6388 {
6389 #if (__ARM_VMSA__ == 7)
6390 vm_offset_t pa;
6391 tt_entry_t *tte_p;
6392 tt_entry_t *tt_p;
6393 unsigned int i;
6394
6395
6396 while (tte_index(pmap, v) >= pmap->tte_index_max) {
6397 tte_p = pmap_tt1_allocate(pmap, 2*ARM_PGBYTES, ((options & PMAP_OPTIONS_NOWAIT)? PMAP_TT_ALLOCATE_NOWAIT : 0));
6398 if (tte_p == (tt_entry_t *)0)
6399 return KERN_RESOURCE_SHORTAGE;
6400
6401 PMAP_LOCK(pmap);
6402 if (pmap->tte_index_max > NTTES) {
6403 pmap_tt1_deallocate(pmap, tte_p, 2*ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
6404 PMAP_UNLOCK(pmap);
6405 break;
6406 }
6407
6408 simple_lock(&pmap->tt1_lock);
6409 for (i = 0; i < pmap->tte_index_max; i++)
6410 tte_p[i] = pmap->tte[i];
6411 for (i = NTTES; i < 2*NTTES; i++)
6412 tte_p[i] = ARM_TTE_TYPE_FAULT;
6413
6414 pmap->prev_tte = pmap->tte;
6415 pmap->tte = tte_p;
6416 pmap->ttep = ml_static_vtop((vm_offset_t)pmap->tte);
6417 #ifndef __ARM_L1_PTW__
6418 CleanPoU_DcacheRegion((vm_offset_t) pmap->tte, 2*NTTES * sizeof(tt_entry_t));
6419 #else
6420 __builtin_arm_dsb(DSB_ISH);
6421 #endif
6422 pmap->tte_index_max = 2*NTTES;
6423 pmap->stamp = hw_atomic_add(&pmap_stamp, 1);
6424
6425 for (i = 0; i < NTTES; i++)
6426 pmap->prev_tte[i] = ARM_TTE_TYPE_FAULT;
6427 #ifndef __ARM_L1_PTW__
6428 CleanPoU_DcacheRegion((vm_offset_t) pmap->prev_tte, NTTES * sizeof(tt_entry_t));
6429 #else
6430 __builtin_arm_dsb(DSB_ISH);
6431 #endif
6432
6433 simple_unlock(&pmap->tt1_lock);
6434 PMAP_UNLOCK(pmap);
6435 pmap_set_pmap(pmap, current_thread());
6436
6437 }
6438
6439 if (level == 1)
6440 return (KERN_SUCCESS);
6441
6442 {
6443 tt_entry_t *tte_next_p;
6444
6445 PMAP_LOCK(pmap);
6446 pa = 0;
6447 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
6448 PMAP_UNLOCK(pmap);
6449 return (KERN_SUCCESS);
6450 }
6451 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
6452 for (i = 0, tte_next_p = tte_p; i<4; i++) {
6453 if (tte_to_pa(*tte_next_p)) {
6454 pa = tte_to_pa(*tte_next_p);
6455 break;
6456 }
6457 tte_next_p++;
6458 }
6459 pa = pa & ~PAGE_MASK;
6460 if (pa) {
6461 tte_p = &pmap->tte[ttenum(v)];
6462 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
6463 #ifndef __ARM_L1_PTW__
6464 CleanPoU_DcacheRegion((vm_offset_t) tte_p, sizeof(tt_entry_t));
6465 #endif
6466 PMAP_UNLOCK(pmap);
6467 return (KERN_SUCCESS);
6468 }
6469 PMAP_UNLOCK(pmap);
6470 }
6471 v = v & ~ARM_TT_L1_PT_OFFMASK;
6472
6473
6474 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6475 /*
6476 * Allocate a VM page for the level 2 page table entries.
6477 */
6478 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6479 if(options & PMAP_OPTIONS_NOWAIT) {
6480 return KERN_RESOURCE_SHORTAGE;
6481 }
6482 VM_PAGE_WAIT();
6483 }
6484
6485 PMAP_LOCK(pmap);
6486 /*
6487 * See if someone else expanded us first
6488 */
6489 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6490 tt_entry_t *tte_next_p;
6491
6492 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
6493 pa = kvtophys((vm_offset_t)tt_p);
6494 #ifndef __ARM_L1_PTW__
6495 CleanPoU_DcacheRegion((vm_offset_t) phystokv(pa), PAGE_SIZE);
6496 #endif
6497 tte_p = &pmap->tte[ttenum(v)];
6498 for (i = 0, tte_next_p = tte_p; i<4; i++) {
6499 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
6500 tte_next_p++;
6501 pa = pa +0x400;
6502 }
6503 #ifndef __ARM_L1_PTW__
6504 CleanPoU_DcacheRegion((vm_offset_t) tte_p, 4*sizeof(tt_entry_t));
6505 #endif
6506 pa = 0x0ULL;
6507 tt_p = (tt_entry_t *)NULL;
6508 }
6509 PMAP_UNLOCK(pmap);
6510 if (tt_p != (tt_entry_t *)NULL) {
6511 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
6512 tt_p = (tt_entry_t *)NULL;
6513 }
6514 }
6515 return (KERN_SUCCESS);
6516 #else
6517 pmap_paddr_t pa;
6518 #if __ARM64_TWO_LEVEL_PMAP__
6519 /* If we are using a two level page table, we'll start at L2. */
6520 unsigned int ttlevel = 2;
6521 #else
6522 /* Otherwise, we start at L1 (we use 3 levels by default). */
6523 unsigned int ttlevel = 1;
6524 #endif
6525 tt_entry_t *tte_p;
6526 tt_entry_t *tt_p;
6527
6528 pa = 0x0ULL;
6529 tt_p = (tt_entry_t *)NULL;
6530
6531 for (; ttlevel < level; ttlevel++) {
6532
6533 PMAP_LOCK(pmap);
6534
6535 if (ttlevel == 1) {
6536 if ((pmap_tt2e(pmap, v) == PT_ENTRY_NULL)) {
6537 PMAP_UNLOCK(pmap);
6538 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6539 if(options & PMAP_OPTIONS_NOWAIT) {
6540 return KERN_RESOURCE_SHORTAGE;
6541 }
6542 VM_PAGE_WAIT();
6543 }
6544 PMAP_LOCK(pmap);
6545 if ((pmap_tt2e(pmap, v) == PT_ENTRY_NULL)) {
6546 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
6547 pa = kvtophys((vm_offset_t)tt_p);
6548 tte_p = pmap_tt1e( pmap, v);
6549 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6550 pa = 0x0ULL;
6551 tt_p = (tt_entry_t *)NULL;
6552 if ((pmap == kernel_pmap) && (VM_MIN_KERNEL_ADDRESS < 0x00000000FFFFFFFFULL))
6553 current_pmap()->tte[v>>ARM_TT_L1_SHIFT] = kernel_pmap->tte[v>>ARM_TT_L1_SHIFT];
6554 }
6555
6556 }
6557 } else if (ttlevel == 2) {
6558 if (pmap_tt3e(pmap, v) == PT_ENTRY_NULL) {
6559 PMAP_UNLOCK(pmap);
6560 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L3_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6561 if(options & PMAP_OPTIONS_NOWAIT) {
6562 return KERN_RESOURCE_SHORTAGE;
6563 }
6564 VM_PAGE_WAIT();
6565 }
6566 PMAP_LOCK(pmap);
6567 if ((pmap_tt3e(pmap, v) == PT_ENTRY_NULL)) {
6568 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v , PMAP_TT_L3_LEVEL, FALSE);
6569 pa = kvtophys((vm_offset_t)tt_p);
6570 tte_p = pmap_tt2e( pmap, v);
6571 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6572 pa = 0x0ULL;
6573 tt_p = (tt_entry_t *)NULL;
6574 }
6575 }
6576 }
6577
6578 PMAP_UNLOCK(pmap);
6579
6580 if (tt_p != (tt_entry_t *)NULL) {
6581 pmap_tt_deallocate(pmap, tt_p, ttlevel+1);
6582 tt_p = (tt_entry_t *)NULL;
6583 }
6584 }
6585
6586 return (KERN_SUCCESS);
6587 #endif
6588 }
6589
6590 /*
6591 * Routine: pmap_collect
6592 * Function:
6593 * Garbage collects the physical map system for
6594 * pages which are no longer used.
6595 * Success need not be guaranteed -- that is, there
6596 * may well be pages which are not referenced, but
6597 * others may be collected.
6598 */
6599 void
6600 pmap_collect(pmap_t pmap)
6601 {
6602 if (pmap == PMAP_NULL)
6603 return;
6604
6605 #if 0
6606 PMAP_LOCK(pmap);
6607 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
6608 /* TODO: Scan for vm page assigned to top level page tables with no reference */
6609 }
6610 PMAP_UNLOCK(pmap);
6611 #endif
6612
6613 return;
6614 }
6615
6616 /*
6617 * Routine: pmap_gc
6618 * Function:
6619 * Pmap garbage collection
6620 * Called by the pageout daemon when pages are scarce.
6621 *
6622 */
6623 void
6624 pmap_gc(
6625 void)
6626 {
6627 pmap_t pmap, pmap_next;
6628 boolean_t gc_wait;
6629
6630 if (pmap_gc_allowed &&
6631 (pmap_gc_allowed_by_time_throttle ||
6632 pmap_gc_forced)) {
6633 pmap_gc_forced = FALSE;
6634 pmap_gc_allowed_by_time_throttle = FALSE;
6635 simple_lock(&pmaps_lock);
6636 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
6637 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
6638 if (!(pmap->gc_status & PMAP_GC_INFLIGHT))
6639 pmap->gc_status |= PMAP_GC_INFLIGHT;
6640 simple_unlock(&pmaps_lock);
6641
6642 pmap_collect(pmap);
6643
6644 simple_lock(&pmaps_lock);
6645 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
6646 pmap->gc_status &= ~(PMAP_GC_INFLIGHT|PMAP_GC_WAIT);
6647 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
6648 if (gc_wait) {
6649 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next))
6650 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
6651 simple_unlock(&pmaps_lock);
6652 thread_wakeup((event_t) & pmap->gc_status);
6653 simple_lock(&pmaps_lock);
6654 }
6655 pmap = pmap_next;
6656 }
6657 simple_unlock(&pmaps_lock);
6658 }
6659 }
6660
6661 /*
6662 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
6663 */
6664 void
6665 pmap_release_pages_fast(void)
6666 {
6667 }
6668
6669 /*
6670 * By default, don't attempt pmap GC more frequently
6671 * than once / 1 minutes.
6672 */
6673
6674 void
6675 compute_pmap_gc_throttle(
6676 void *arg __unused)
6677 {
6678 pmap_gc_allowed_by_time_throttle = TRUE;
6679 }
6680
6681 /*
6682 * pmap_attribute_cache_sync(vm_offset_t pa)
6683 *
6684 * Invalidates all of the instruction cache on a physical page and
6685 * pushes any dirty data from the data cache for the same physical page
6686 */
6687
6688 kern_return_t
6689 pmap_attribute_cache_sync(
6690 ppnum_t pp,
6691 vm_size_t size,
6692 __unused vm_machine_attribute_t attribute,
6693 __unused vm_machine_attribute_val_t * value)
6694 {
6695 if (size > PAGE_SIZE) {
6696 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
6697 } else
6698 cache_sync_page(pp);
6699
6700 return KERN_SUCCESS;
6701 }
6702
6703 /*
6704 * pmap_sync_page_data_phys(ppnum_t pp)
6705 *
6706 * Invalidates all of the instruction cache on a physical page and
6707 * pushes any dirty data from the data cache for the same physical page
6708 */
6709 void
6710 pmap_sync_page_data_phys(
6711 ppnum_t pp)
6712 {
6713 cache_sync_page(pp);
6714 }
6715
6716 /*
6717 * pmap_sync_page_attributes_phys(ppnum_t pp)
6718 *
6719 * Write back and invalidate all cachelines on a physical page.
6720 */
6721 void
6722 pmap_sync_page_attributes_phys(
6723 ppnum_t pp)
6724 {
6725 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
6726 }
6727
6728 #if CONFIG_COREDUMP
6729 /* temporary workaround */
6730 boolean_t
6731 coredumpok(
6732 vm_map_t map,
6733 vm_offset_t va)
6734 {
6735 pt_entry_t *pte_p;
6736 pt_entry_t spte;
6737
6738 pte_p = pmap_pte(map->pmap, va);
6739 if (0 == pte_p)
6740 return FALSE;
6741 spte = *pte_p;
6742 return ((spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT));
6743 }
6744 #endif
6745
6746 void
6747 fillPage(
6748 ppnum_t pn,
6749 unsigned int fill)
6750 {
6751 unsigned int *addr;
6752 int count;
6753
6754 addr = (unsigned int *) phystokv(ptoa(pn));
6755 count = PAGE_SIZE / sizeof(unsigned int);
6756 while (count--)
6757 *addr++ = fill;
6758 }
6759
6760 extern void mapping_set_mod(ppnum_t pn);
6761
6762 void
6763 mapping_set_mod(
6764 ppnum_t pn)
6765 {
6766 pmap_set_modify(pn);
6767 }
6768
6769 extern void mapping_set_ref(ppnum_t pn);
6770
6771 void
6772 mapping_set_ref(
6773 ppnum_t pn)
6774 {
6775 pmap_set_reference(pn);
6776 }
6777
6778 /*
6779 * Clear specified attribute bits.
6780 *
6781 * Try to force an arm_fast_fault() for all mappings of
6782 * the page - to force attributes to be set again at fault time.
6783 * If the forcing succeeds, clear the cached bits at the head.
6784 * Otherwise, something must have been wired, so leave the cached
6785 * attributes alone.
6786 */
6787 static void
6788 phys_attribute_clear_internal(
6789 ppnum_t pn,
6790 unsigned int bits,
6791 int options,
6792 void *arg)
6793 {
6794 pmap_paddr_t pa = ptoa(pn);
6795 vm_prot_t allow_mode = VM_PROT_ALL;
6796
6797
6798 if ((bits & PP_ATTR_MODIFIED) &&
6799 (options & PMAP_OPTIONS_NOFLUSH) &&
6800 (arg == NULL)) {
6801 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
6802 "should not clear 'modified' without flushing TLBs\n",
6803 pn, bits, options, arg);
6804 }
6805
6806 assert(pn != vm_page_fictitious_addr);
6807 if (bits & PP_ATTR_REFERENCED)
6808 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
6809 if (bits & PP_ATTR_MODIFIED)
6810 allow_mode &= ~VM_PROT_WRITE;
6811
6812 if (bits == PP_ATTR_NOENCRYPT) {
6813 /*
6814 * We short circuit this case; it should not need to
6815 * invoke arm_force_fast_fault, so just clear and
6816 * return. On ARM, this bit is just a debugging aid.
6817 */
6818 pa_clear_bits(pa, bits);
6819 return;
6820 }
6821
6822 if (arm_force_fast_fault_internal(pn, allow_mode, options))
6823 pa_clear_bits(pa, bits);
6824 return;
6825 }
6826
6827 static void
6828 phys_attribute_clear(
6829 ppnum_t pn,
6830 unsigned int bits,
6831 int options,
6832 void *arg)
6833 {
6834 /*
6835 * Do we really want this tracepoint? It will be extremely chatty.
6836 * Also, should we have a corresponding trace point for the set path?
6837 */
6838 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
6839
6840 phys_attribute_clear_internal(pn, bits, options, arg);
6841
6842 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
6843 }
6844
6845 /*
6846 * Set specified attribute bits.
6847 *
6848 * Set cached value in the pv head because we have
6849 * no per-mapping hardware support for referenced and
6850 * modify bits.
6851 */
6852 static void
6853 phys_attribute_set_internal(
6854 ppnum_t pn,
6855 unsigned int bits)
6856 {
6857 pmap_paddr_t pa = ptoa(pn);
6858 assert(pn != vm_page_fictitious_addr);
6859
6860
6861 pa_set_bits(pa, bits);
6862
6863 return;
6864 }
6865
6866 static void
6867 phys_attribute_set(
6868 ppnum_t pn,
6869 unsigned int bits)
6870 {
6871 phys_attribute_set_internal(pn, bits);
6872 }
6873
6874
6875 /*
6876 * Check specified attribute bits.
6877 *
6878 * use the software cached bits (since no hw support).
6879 */
6880 static boolean_t
6881 phys_attribute_test(
6882 ppnum_t pn,
6883 unsigned int bits)
6884 {
6885 pmap_paddr_t pa = ptoa(pn);
6886 assert(pn != vm_page_fictitious_addr);
6887 return pa_test_bits(pa, bits);
6888 }
6889
6890
6891 /*
6892 * Set the modify/reference bits on the specified physical page.
6893 */
6894 void
6895 pmap_set_modify(ppnum_t pn)
6896 {
6897 phys_attribute_set(pn, PP_ATTR_MODIFIED);
6898 }
6899
6900
6901 /*
6902 * Clear the modify bits on the specified physical page.
6903 */
6904 void
6905 pmap_clear_modify(
6906 ppnum_t pn)
6907 {
6908 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
6909 }
6910
6911
6912 /*
6913 * pmap_is_modified:
6914 *
6915 * Return whether or not the specified physical page is modified
6916 * by any physical maps.
6917 */
6918 boolean_t
6919 pmap_is_modified(
6920 ppnum_t pn)
6921 {
6922 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
6923 }
6924
6925
6926 /*
6927 * Set the reference bit on the specified physical page.
6928 */
6929 static void
6930 pmap_set_reference(
6931 ppnum_t pn)
6932 {
6933 phys_attribute_set(pn, PP_ATTR_REFERENCED);
6934 }
6935
6936 /*
6937 * Clear the reference bits on the specified physical page.
6938 */
6939 void
6940 pmap_clear_reference(
6941 ppnum_t pn)
6942 {
6943 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
6944 }
6945
6946
6947 /*
6948 * pmap_is_referenced:
6949 *
6950 * Return whether or not the specified physical page is referenced
6951 * by any physical maps.
6952 */
6953 boolean_t
6954 pmap_is_referenced(
6955 ppnum_t pn)
6956 {
6957 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
6958 }
6959
6960 /*
6961 * pmap_get_refmod(phys)
6962 * returns the referenced and modified bits of the specified
6963 * physical page.
6964 */
6965 unsigned int
6966 pmap_get_refmod(
6967 ppnum_t pn)
6968 {
6969 return (((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
6970 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0));
6971 }
6972
6973 /*
6974 * pmap_clear_refmod(phys, mask)
6975 * clears the referenced and modified bits as specified by the mask
6976 * of the specified physical page.
6977 */
6978 void
6979 pmap_clear_refmod_options(
6980 ppnum_t pn,
6981 unsigned int mask,
6982 unsigned int options,
6983 void *arg)
6984 {
6985 unsigned int bits;
6986
6987 bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
6988 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
6989 phys_attribute_clear(pn, bits, options, arg);
6990 }
6991
6992 void
6993 pmap_clear_refmod(
6994 ppnum_t pn,
6995 unsigned int mask)
6996 {
6997 pmap_clear_refmod_options(pn, mask, 0, NULL);
6998 }
6999
7000 unsigned int
7001 pmap_disconnect_options(
7002 ppnum_t pn,
7003 unsigned int options,
7004 void *arg)
7005 {
7006 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
7007 /*
7008 * On ARM, the "modified" bit is managed by software, so
7009 * we know up-front if the physical page is "modified",
7010 * without having to scan all the PTEs pointing to it.
7011 * The caller should have made the VM page "busy" so noone
7012 * should be able to establish any new mapping and "modify"
7013 * the page behind us.
7014 */
7015 if (pmap_is_modified(pn)) {
7016 /*
7017 * The page has been modified and will be sent to
7018 * the VM compressor.
7019 */
7020 options |= PMAP_OPTIONS_COMPRESSOR;
7021 } else {
7022 /*
7023 * The page hasn't been modified and will be freed
7024 * instead of compressed.
7025 */
7026 }
7027 }
7028
7029 /* disconnect the page */
7030 pmap_page_protect_options(pn, 0, options, arg);
7031
7032 /* return ref/chg status */
7033 return (pmap_get_refmod(pn));
7034 }
7035
7036 /*
7037 * Routine:
7038 * pmap_disconnect
7039 *
7040 * Function:
7041 * Disconnect all mappings for this page and return reference and change status
7042 * in generic format.
7043 *
7044 */
7045 unsigned int
7046 pmap_disconnect(
7047 ppnum_t pn)
7048 {
7049 pmap_page_protect(pn, 0); /* disconnect the page */
7050 return (pmap_get_refmod(pn)); /* return ref/chg status */
7051 }
7052
7053 boolean_t
7054 pmap_has_managed_page(ppnum_t first, ppnum_t last)
7055 {
7056 if (ptoa(first) >= vm_last_phys) return (FALSE);
7057 if (ptoa(last) < vm_first_phys) return (FALSE);
7058
7059 return (TRUE);
7060 }
7061
7062 /*
7063 * The state maintained by the noencrypt functions is used as a
7064 * debugging aid on ARM. This incurs some overhead on the part
7065 * of the caller. A special case check in phys_attribute_clear
7066 * (the most expensive path) currently minimizes this overhead,
7067 * but stubbing these functions out on RELEASE kernels yields
7068 * further wins.
7069 */
7070 boolean_t
7071 pmap_is_noencrypt(
7072 ppnum_t pn)
7073 {
7074 #if DEVELOPMENT || DEBUG
7075 boolean_t result = FALSE;
7076
7077 if (!pa_valid(ptoa(pn))) return FALSE;
7078
7079 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
7080
7081 return result;
7082 #else
7083 #pragma unused(pn)
7084 return FALSE;
7085 #endif
7086 }
7087
7088 void
7089 pmap_set_noencrypt(
7090 ppnum_t pn)
7091 {
7092 #if DEVELOPMENT || DEBUG
7093 if (!pa_valid(ptoa(pn))) return;
7094
7095 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
7096 #else
7097 #pragma unused(pn)
7098 #endif
7099 }
7100
7101 void
7102 pmap_clear_noencrypt(
7103 ppnum_t pn)
7104 {
7105 #if DEVELOPMENT || DEBUG
7106 if (!pa_valid(ptoa(pn))) return;
7107
7108 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
7109 #else
7110 #pragma unused(pn)
7111 #endif
7112 }
7113
7114
7115 void
7116 pmap_lock_phys_page(ppnum_t pn)
7117 {
7118 int pai;
7119 pmap_paddr_t phys = ptoa(pn);
7120
7121 if (pa_valid(phys)) {
7122 pai = (int)pa_index(phys);
7123 LOCK_PVH(pai);
7124 } else
7125 simple_lock(&phys_backup_lock);
7126 }
7127
7128
7129 void
7130 pmap_unlock_phys_page(ppnum_t pn)
7131 {
7132 int pai;
7133 pmap_paddr_t phys = ptoa(pn);
7134
7135 if (pa_valid(phys)) {
7136 pai = (int)pa_index(phys);
7137 UNLOCK_PVH(pai);
7138 } else
7139 simple_unlock(&phys_backup_lock);
7140 }
7141
7142 static void
7143 pmap_switch_user_ttb_internal(
7144 pmap_t pmap)
7145 {
7146 #if (__ARM_VMSA__ == 7)
7147 pmap_cpu_data_t *cpu_data_ptr;
7148
7149 cpu_data_ptr = pmap_get_cpu_data();
7150
7151 if ((cpu_data_ptr->cpu_user_pmap != PMAP_NULL)
7152 && (cpu_data_ptr->cpu_user_pmap != kernel_pmap)) {
7153 unsigned int c;
7154
7155 c = hw_atomic_sub((volatile uint32_t *)&cpu_data_ptr->cpu_user_pmap->cpu_ref, 1);
7156 if ((c == 0) && (cpu_data_ptr->cpu_user_pmap->prev_tte != 0)) {
7157 /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
7158 * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
7159 tt_entry_t *tt_entry;
7160
7161 tt_entry = cpu_data_ptr->cpu_user_pmap->prev_tte;
7162 cpu_data_ptr->cpu_user_pmap->prev_tte = (tt_entry_t *) NULL;
7163 pmap_tt1_deallocate(cpu_data_ptr->cpu_user_pmap, tt_entry, ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
7164 }
7165 }
7166 cpu_data_ptr->cpu_user_pmap = pmap;
7167 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
7168 (void) hw_atomic_add((volatile uint32_t *)&pmap->cpu_ref, 1);
7169
7170 #if MACH_ASSERT && __ARM_USER_PROTECT__
7171 {
7172 unsigned int ttbr0_val, ttbr1_val;
7173 __asm__ volatile("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
7174 __asm__ volatile("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
7175 if (ttbr0_val != ttbr1_val) {
7176 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
7177 }
7178 }
7179 #endif
7180 if (pmap->tte_index_max == NTTES) {
7181 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7182 __asm__ volatile("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7183 __asm__ volatile("isb");
7184 #if !__ARM_USER_PROTECT__
7185 set_mmu_ttb(pmap->ttep);
7186 #endif
7187 } else {
7188 #if !__ARM_USER_PROTECT__
7189 set_mmu_ttb(pmap->ttep);
7190 #endif
7191 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7192 __asm__ volatile("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7193 __asm__ volatile("isb");
7194 #if MACH_ASSERT && __ARM_USER_PROTECT__
7195 if (pmap->ttep & 0x1000) {
7196 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
7197 }
7198 #endif
7199 }
7200
7201 #if !__ARM_USER_PROTECT__
7202 set_context_id(pmap->asid);
7203 #endif
7204 #else
7205
7206 pmap_get_cpu_data()->cpu_user_pmap = pmap;
7207 pmap_get_cpu_data()->cpu_user_pmap_stamp = pmap->stamp;
7208
7209 #if !__arm64__
7210 set_context_id(pmap->asid); /* Not required */
7211 #endif
7212 if (pmap == kernel_pmap) {
7213 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
7214 } else {
7215 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK)|(((uint64_t)pmap->asid) << TTBR_ASID_SHIFT));
7216 }
7217 #endif
7218 }
7219
7220 void
7221 pmap_switch_user_ttb(
7222 pmap_t pmap)
7223 {
7224 pmap_switch_user_ttb_internal(pmap);
7225 }
7226
7227 /*
7228 * Try to "intuit" whether we need to raise a VM_PROT_WRITE fault
7229 * for the given address when a "swp" instruction raised the fault.
7230 * We have to look at the existing pte for the address to see
7231 * if it needs to get bumped, or just added. If just added, do it
7232 * as a read-only mapping first (this could result in extra faults -
7233 * but better that than extra copy-on-write evaluations).
7234 */
7235
7236 #if (__ARM_VMSA__ == 7)
7237 boolean_t
7238 arm_swap_readable_type(
7239 vm_map_address_t addr,
7240 unsigned int spsr)
7241 {
7242 int ap;
7243 pt_entry_t spte;
7244 pt_entry_t *ptep;
7245
7246 ptep = pmap_pte(current_pmap(), addr);
7247 if (ptep == PT_ENTRY_NULL)
7248 return (FALSE);
7249
7250 spte = *ptep;
7251 if (spte == ARM_PTE_TYPE_FAULT ||
7252 ARM_PTE_IS_COMPRESSED(spte))
7253 return (FALSE);
7254
7255 /* get the access permission bitmaps */
7256 /* (all subpages should be the same) */
7257 ap = (spte & ARM_PTE_APMASK);
7258
7259 if (spsr & 0xf) { /* Supervisor mode */
7260 panic("arm_swap_readable_type supv");
7261 return TRUE;
7262 } else { /* User mode */
7263 if ((ap == ARM_PTE_AP(AP_RWRW)) || (ap == ARM_PTE_AP(AP_RORO)))
7264 return (FALSE);
7265 else
7266 return (TRUE);
7267 }
7268 }
7269 #endif
7270
7271 /*
7272 * Routine: arm_force_fast_fault
7273 *
7274 * Function:
7275 * Force all mappings for this page to fault according
7276 * to the access modes allowed, so we can gather ref/modify
7277 * bits again.
7278 */
7279 static boolean_t
7280 arm_force_fast_fault_internal(
7281 ppnum_t ppnum,
7282 vm_prot_t allow_mode,
7283 int options)
7284 {
7285 pmap_paddr_t phys = ptoa(ppnum);
7286 pv_entry_t *pve_p;
7287 pt_entry_t *pte_p;
7288 int pai;
7289 boolean_t result;
7290 pv_entry_t **pv_h;
7291 boolean_t is_reusable, is_internal;
7292 boolean_t ref_fault;
7293 boolean_t mod_fault;
7294
7295 assert(ppnum != vm_page_fictitious_addr);
7296
7297 if (!pa_valid(phys)) {
7298 return FALSE; /* Not a managed page. */
7299 }
7300
7301 result = TRUE;
7302 ref_fault = FALSE;
7303 mod_fault = FALSE;
7304 pai = (int)pa_index(phys);
7305 LOCK_PVH(pai);
7306 pv_h = pai_to_pvh(pai);
7307
7308 pte_p = PT_ENTRY_NULL;
7309 pve_p = PV_ENTRY_NULL;
7310 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7311 pte_p = pvh_ptep(pv_h);
7312 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7313 pve_p = pvh_list(pv_h);
7314 }
7315
7316 is_reusable = IS_REUSABLE_PAGE(pai);
7317 is_internal = IS_INTERNAL_PAGE(pai);
7318
7319 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7320 vm_map_address_t va;
7321 pt_entry_t spte;
7322 pt_entry_t tmplate;
7323 pmap_t pmap;
7324 boolean_t update_pte;
7325
7326 if (pve_p != PV_ENTRY_NULL)
7327 pte_p = pve_get_ptep(pve_p);
7328
7329 if (pte_p == PT_ENTRY_NULL) {
7330 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7331 }
7332 if (*pte_p == ARM_PTE_EMPTY) {
7333 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7334 }
7335 if (ARM_PTE_IS_COMPRESSED(*pte_p)) {
7336 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7337 }
7338
7339 pmap = ptep_get_pmap(pte_p);
7340 va = ptep_get_va(pte_p);
7341
7342 assert(va >= pmap->min && va < pmap->max);
7343
7344 if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
7345 result = FALSE;
7346 break;
7347 }
7348
7349 spte = *pte_p;
7350 tmplate = spte;
7351 update_pte = FALSE;
7352
7353 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
7354 /* read protection sets the pte to fault */
7355 tmplate = tmplate & ~ARM_PTE_AF;
7356 update_pte = TRUE;
7357 ref_fault = TRUE;
7358 }
7359 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
7360 /* take away write permission if set */
7361 if (pmap == kernel_pmap) {
7362 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
7363 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7364 }
7365 } else {
7366 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
7367 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RORO));
7368 }
7369 }
7370
7371 pte_set_ffr(tmplate, 1);
7372 update_pte = TRUE;
7373 mod_fault = TRUE;
7374 }
7375
7376
7377 if (update_pte) {
7378 if (*pte_p != ARM_PTE_TYPE_FAULT &&
7379 !ARM_PTE_IS_COMPRESSED(*pte_p)) {
7380 WRITE_PTE(pte_p, tmplate);
7381 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
7382 } else {
7383 WRITE_PTE(pte_p, tmplate);
7384 __asm__ volatile("isb");
7385 }
7386 }
7387
7388 /* update pmap stats and ledgers */
7389 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7390 /*
7391 * We do not track "reusable" status for
7392 * "alternate accounting" mappings.
7393 */
7394 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7395 is_reusable &&
7396 is_internal &&
7397 pmap != kernel_pmap) {
7398 /* one less "reusable" */
7399 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7400 OSAddAtomic(-1, &pmap->stats.reusable);
7401 /* one more "internal" */
7402 OSAddAtomic(+1, &pmap->stats.internal);
7403 PMAP_STATS_PEAK(pmap->stats.internal);
7404 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7405 pmap_ledger_credit(pmap,
7406 task_ledgers.internal,
7407 machine_ptob(1));
7408 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7409 assert(IS_INTERNAL_PAGE(pai));
7410 pmap_ledger_credit(pmap,
7411 task_ledgers.phys_footprint,
7412 machine_ptob(1));
7413
7414 /*
7415 * Avoid the cost of another trap to handle the fast
7416 * fault when we next write to this page: let's just
7417 * handle that now since we already have all the
7418 * necessary information.
7419 */
7420 {
7421 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
7422 }
7423 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7424 !is_reusable &&
7425 is_internal &&
7426 pmap != kernel_pmap) {
7427 /* one more "reusable" */
7428 OSAddAtomic(+1, &pmap->stats.reusable);
7429 PMAP_STATS_PEAK(pmap->stats.reusable);
7430 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7431 /* one less "internal" */
7432 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7433 OSAddAtomic(-1, &pmap->stats.internal);
7434 pmap_ledger_debit(pmap,
7435 task_ledgers.internal,
7436 machine_ptob(1));
7437 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7438 assert(IS_INTERNAL_PAGE(pai));
7439 pmap_ledger_debit(pmap,
7440 task_ledgers.phys_footprint,
7441 machine_ptob(1));
7442 }
7443
7444 pte_p = PT_ENTRY_NULL;
7445 if (pve_p != PV_ENTRY_NULL)
7446 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7447 }
7448
7449 /* update global "reusable" status for this page */
7450 if (is_internal) {
7451 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7452 is_reusable) {
7453 CLR_REUSABLE_PAGE(pai);
7454 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7455 !is_reusable) {
7456 SET_REUSABLE_PAGE(pai);
7457 }
7458 }
7459
7460 if (mod_fault) {
7461 SET_MODFAULT_PAGE(pai);
7462 }
7463 if (ref_fault) {
7464 SET_REFFAULT_PAGE(pai);
7465 }
7466
7467 UNLOCK_PVH(pai);
7468 return result;
7469 }
7470
7471 boolean_t
7472 arm_force_fast_fault(
7473 ppnum_t ppnum,
7474 vm_prot_t allow_mode,
7475 int options,
7476 __unused void *arg)
7477 {
7478 pmap_paddr_t phys = ptoa(ppnum);
7479
7480 assert(ppnum != vm_page_fictitious_addr);
7481
7482 if (!pa_valid(phys)) {
7483 return FALSE; /* Not a managed page. */
7484 }
7485
7486 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
7487 }
7488
7489 /*
7490 * Routine: arm_clear_fast_fault
7491 *
7492 * Function:
7493 * Clear pending force fault for all mappings for this page based on
7494 * the observed fault type, update ref/modify bits.
7495 */
7496 boolean_t
7497 arm_clear_fast_fault(
7498 ppnum_t ppnum,
7499 vm_prot_t fault_type)
7500 {
7501 pmap_paddr_t pa = ptoa(ppnum);
7502 pv_entry_t *pve_p;
7503 pt_entry_t *pte_p;
7504 int pai;
7505 boolean_t result;
7506 pv_entry_t **pv_h;
7507
7508 assert(ppnum != vm_page_fictitious_addr);
7509
7510 if (!pa_valid(pa)) {
7511 return FALSE; /* Not a managed page. */
7512 }
7513
7514 result = FALSE;
7515 pai = (int)pa_index(pa);
7516 ASSERT_PVH_LOCKED(pai);
7517 pv_h = pai_to_pvh(pai);
7518
7519 pte_p = PT_ENTRY_NULL;
7520 pve_p = PV_ENTRY_NULL;
7521 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7522 pte_p = pvh_ptep(pv_h);
7523 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7524 pve_p = pvh_list(pv_h);
7525 }
7526
7527 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7528 vm_map_address_t va;
7529 pt_entry_t spte;
7530 pt_entry_t tmplate;
7531 pmap_t pmap;
7532
7533 if (pve_p != PV_ENTRY_NULL)
7534 pte_p = pve_get_ptep(pve_p);
7535
7536 if (pte_p == PT_ENTRY_NULL) {
7537 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7538 }
7539 if (*pte_p == ARM_PTE_EMPTY) {
7540 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7541 }
7542
7543 pmap = ptep_get_pmap(pte_p);
7544 va = ptep_get_va(pte_p);
7545
7546 assert(va >= pmap->min && va < pmap->max);
7547
7548 spte = *pte_p;
7549 tmplate = spte;
7550
7551 if ((fault_type & VM_PROT_WRITE) && (pte_is_ffr(spte))) {
7552 {
7553 if (pmap == kernel_pmap)
7554 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
7555 else
7556 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWRW));
7557 }
7558
7559 tmplate |= ARM_PTE_AF;
7560
7561 pte_set_ffr(tmplate, 0);
7562 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
7563
7564 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
7565 tmplate = spte | ARM_PTE_AF;
7566
7567 {
7568 pa_set_bits(pa, PP_ATTR_REFERENCED);
7569 }
7570 }
7571
7572
7573 if (spte != tmplate) {
7574 if (spte != ARM_PTE_TYPE_FAULT) {
7575 WRITE_PTE(pte_p, tmplate);
7576 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
7577 } else {
7578 WRITE_PTE(pte_p, tmplate);
7579 __asm__ volatile("isb");
7580 }
7581 result = TRUE;
7582 }
7583
7584 pte_p = PT_ENTRY_NULL;
7585 if (pve_p != PV_ENTRY_NULL)
7586 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7587 }
7588 return result;
7589 }
7590
7591 /*
7592 * Determine if the fault was induced by software tracking of
7593 * modify/reference bits. If so, re-enable the mapping (and set
7594 * the appropriate bits).
7595 *
7596 * Returns KERN_SUCCESS if the fault was induced and was
7597 * successfully handled.
7598 *
7599 * Returns KERN_FAILURE if the fault was not induced and
7600 * the function was unable to deal with it.
7601 *
7602 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
7603 * disallows this type of access.
7604 */
7605 static kern_return_t
7606 arm_fast_fault_internal(
7607 pmap_t pmap,
7608 vm_map_address_t va,
7609 vm_prot_t fault_type,
7610 __unused boolean_t from_user)
7611 {
7612 kern_return_t result = KERN_FAILURE;
7613 pt_entry_t *ptep;
7614 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
7615 int pai;
7616 pmap_paddr_t pa;
7617
7618 PMAP_LOCK(pmap);
7619
7620 /*
7621 * If the entry doesn't exist, is completely invalid, or is already
7622 * valid, we can't fix it here.
7623 */
7624
7625 ptep = pmap_pte(pmap, va);
7626 if (ptep != PT_ENTRY_NULL) {
7627 spte = *ptep;
7628
7629 pa = pte_to_pa(spte);
7630
7631 if ((spte == ARM_PTE_TYPE_FAULT) ||
7632 ARM_PTE_IS_COMPRESSED(spte) ||
7633 (!pa_valid(pa))) {
7634 PMAP_UNLOCK(pmap);
7635 return result;
7636 }
7637
7638 pai = (int)pa_index(pa);
7639 LOCK_PVH(pai);
7640 } else {
7641 PMAP_UNLOCK(pmap);
7642 return result;
7643 }
7644
7645
7646 if ((IS_REFFAULT_PAGE(pai)) ||
7647 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
7648 /*
7649 * An attempted access will always clear ref/mod fault state, as
7650 * appropriate for the fault type. arm_clear_fast_fault will
7651 * update the associated PTEs for the page as appropriate; if
7652 * any PTEs are updated, we redrive the access. If the mapping
7653 * does not actually allow for the attempted access, the
7654 * following fault will (hopefully) fail to update any PTEs, and
7655 * thus cause arm_fast_fault to decide that it failed to handle
7656 * the fault.
7657 */
7658 if (IS_REFFAULT_PAGE(pai)) {
7659 CLR_REFFAULT_PAGE(pai);
7660 }
7661 if ( (fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
7662 CLR_MODFAULT_PAGE(pai);
7663 }
7664
7665 if (arm_clear_fast_fault((ppnum_t)atop(pa),fault_type)) {
7666 /*
7667 * Should this preserve KERN_PROTECTION_FAILURE? The
7668 * cost of not doing so is a another fault in a case
7669 * that should already result in an exception.
7670 */
7671 result = KERN_SUCCESS;
7672 }
7673 }
7674
7675 UNLOCK_PVH(pai);
7676 PMAP_UNLOCK(pmap);
7677 return result;
7678 }
7679
7680 kern_return_t
7681 arm_fast_fault(
7682 pmap_t pmap,
7683 vm_map_address_t va,
7684 vm_prot_t fault_type,
7685 __unused boolean_t from_user)
7686 {
7687 kern_return_t result = KERN_FAILURE;
7688
7689 if (va < pmap->min || va >= pmap->max)
7690 return result;
7691
7692 PMAP_TRACE(PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
7693 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
7694 from_user);
7695
7696 #if (__ARM_VMSA__ == 7)
7697 if (pmap != kernel_pmap) {
7698 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
7699 pmap_t cur_pmap;
7700 pmap_t cur_user_pmap;
7701
7702 cur_pmap = current_pmap();
7703 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
7704
7705 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
7706 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
7707 pmap_set_pmap(pmap, current_thread());
7708 result = KERN_SUCCESS;
7709 goto done;
7710 }
7711 }
7712 }
7713 #endif
7714
7715 result = arm_fast_fault_internal(pmap, va, fault_type, from_user);
7716
7717 #if (__ARM_VMSA__ == 7)
7718 done:
7719 #endif
7720
7721 PMAP_TRACE(PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
7722
7723 return result;
7724 }
7725
7726 void
7727 pmap_copy_page(
7728 ppnum_t psrc,
7729 ppnum_t pdst)
7730 {
7731 bcopy_phys((addr64_t) (ptoa(psrc)),
7732 (addr64_t) (ptoa(pdst)),
7733 PAGE_SIZE);
7734 }
7735
7736
7737 /*
7738 * pmap_copy_page copies the specified (machine independent) pages.
7739 */
7740 void
7741 pmap_copy_part_page(
7742 ppnum_t psrc,
7743 vm_offset_t src_offset,
7744 ppnum_t pdst,
7745 vm_offset_t dst_offset,
7746 vm_size_t len)
7747 {
7748 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
7749 (addr64_t) (ptoa(pdst) + dst_offset),
7750 len);
7751 }
7752
7753
7754 /*
7755 * pmap_zero_page zeros the specified (machine independent) page.
7756 */
7757 void
7758 pmap_zero_page(
7759 ppnum_t pn)
7760 {
7761 assert(pn != vm_page_fictitious_addr);
7762 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
7763 }
7764
7765 /*
7766 * pmap_zero_part_page
7767 * zeros the specified (machine independent) part of a page.
7768 */
7769 void
7770 pmap_zero_part_page(
7771 ppnum_t pn,
7772 vm_offset_t offset,
7773 vm_size_t len)
7774 {
7775 assert(pn != vm_page_fictitious_addr);
7776 assert(offset + len <= PAGE_SIZE);
7777 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
7778 }
7779
7780
7781 /*
7782 * nop in current arm implementation
7783 */
7784 void
7785 inval_copy_windows(
7786 __unused thread_t t)
7787 {
7788 }
7789
7790 void
7791 pmap_map_globals(
7792 void)
7793 {
7794 pt_entry_t *ptep, pte;
7795
7796 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
7797 assert(ptep != PT_ENTRY_NULL);
7798 assert(*ptep == ARM_PTE_EMPTY);
7799
7800 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
7801 #if __ARM_KERNEL_PROTECT__
7802 pte |= ARM_PTE_NG;
7803 #endif /* __ARM_KERNEL_PROTECT__ */
7804 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
7805 #if (__ARM_VMSA__ > 7)
7806 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
7807 #else
7808 pte |= ARM_PTE_SH;
7809 #endif
7810 *ptep = pte;
7811 FLUSH_PTE_RANGE(ptep,(ptep+1));
7812 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE);
7813 }
7814
7815 vm_offset_t
7816 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
7817 {
7818 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
7819 }
7820
7821 static unsigned int
7822 pmap_map_cpu_windows_copy_internal(
7823 ppnum_t pn,
7824 vm_prot_t prot,
7825 unsigned int wimg_bits)
7826 {
7827 pt_entry_t *ptep = NULL, pte;
7828 unsigned int cpu_num;
7829 unsigned int i;
7830 vm_offset_t cpu_copywindow_vaddr = 0;
7831
7832 cpu_num = pmap_get_cpu_data()->cpu_number;
7833
7834 for (i = 0; i<CPUWINDOWS_MAX; i++) {
7835 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
7836 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
7837 assert(!ARM_PTE_IS_COMPRESSED(*ptep));
7838 if (*ptep == ARM_PTE_TYPE_FAULT)
7839 break;
7840 }
7841 if (i == CPUWINDOWS_MAX) {
7842 panic("pmap_map_cpu_windows_copy: out of window\n");
7843 }
7844
7845 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
7846 #if __ARM_KERNEL_PROTECT__
7847 pte |= ARM_PTE_NG;
7848 #endif /* __ARM_KERNEL_PROTECT__ */
7849
7850 pte |= wimg_to_pte(wimg_bits);
7851
7852 if (prot & VM_PROT_WRITE) {
7853 pte |= ARM_PTE_AP(AP_RWNA);
7854 } else {
7855 pte |= ARM_PTE_AP(AP_RONA);
7856 }
7857
7858 WRITE_PTE(ptep, pte);
7859 /*
7860 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
7861 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
7862 */
7863 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE);
7864
7865 return(i);
7866 }
7867
7868 unsigned int
7869 pmap_map_cpu_windows_copy(
7870 ppnum_t pn,
7871 vm_prot_t prot,
7872 unsigned int wimg_bits)
7873 {
7874 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
7875 }
7876
7877 static void
7878 pmap_unmap_cpu_windows_copy_internal(
7879 unsigned int index)
7880 {
7881 pt_entry_t *ptep;
7882 unsigned int cpu_num;
7883 vm_offset_t cpu_copywindow_vaddr = 0;
7884
7885 cpu_num = pmap_get_cpu_data()->cpu_number;
7886
7887 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
7888 __asm__ volatile("dsb sy");
7889 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
7890 WRITE_PTE(ptep, ARM_PTE_TYPE_FAULT);
7891 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE);
7892 }
7893
7894 void
7895 pmap_unmap_cpu_windows_copy(
7896 unsigned int index)
7897 {
7898 return pmap_unmap_cpu_windows_copy_internal(index);
7899 }
7900
7901 /*
7902 * Marked a pmap has nested
7903 */
7904 static void
7905 pmap_set_nested_internal(
7906 pmap_t pmap)
7907 {
7908 pmap->nested = TRUE;
7909 }
7910
7911 void
7912 pmap_set_nested(
7913 pmap_t pmap)
7914 {
7915 pmap_set_nested_internal(pmap);
7916 }
7917
7918 /*
7919 * kern_return_t pmap_nest(grand, subord, vstart, size)
7920 *
7921 * grand = the pmap that we will nest subord into
7922 * subord = the pmap that goes into the grand
7923 * vstart = start of range in pmap to be inserted
7924 * nstart = start of range in pmap nested pmap
7925 * size = Size of nest area (up to 16TB)
7926 *
7927 * Inserts a pmap into another. This is used to implement shared segments.
7928 *
7929 */
7930
7931 static kern_return_t
7932 pmap_nest_internal(
7933 pmap_t grand,
7934 pmap_t subord,
7935 addr64_t vstart,
7936 addr64_t nstart,
7937 uint64_t size)
7938 {
7939 kern_return_t kr = KERN_FAILURE;
7940 vm_map_offset_t vaddr, nvaddr;
7941 tt_entry_t *stte_p;
7942 tt_entry_t *gtte_p;
7943 unsigned int i;
7944 unsigned int num_tte;
7945 unsigned int nested_region_asid_bitmap_size;
7946 unsigned int* nested_region_asid_bitmap;
7947 int expand_options = 0;
7948
7949
7950 #if (__ARM_VMSA__ == 7)
7951 if (((size|vstart|nstart) & ARM_TT_L1_PT_OFFMASK) != 0x0ULL) {
7952 return KERN_INVALID_VALUE; /* Nest 4MB region */
7953 }
7954 #else
7955 if (((size|vstart|nstart) & (ARM_TT_L2_OFFMASK)) != 0x0ULL) {
7956 panic("pmap_nest() pmap %p has a nested pmap 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
7957 }
7958 #endif
7959
7960 if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
7961 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
7962 }
7963
7964 if (subord->nested_region_asid_bitmap == NULL) {
7965 nested_region_asid_bitmap_size = (unsigned int)(size>>ARM_TT_TWIG_SHIFT)/(sizeof(unsigned int)*NBBY);
7966
7967 nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size*sizeof(unsigned int));
7968 bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size*sizeof(unsigned int));
7969
7970 PMAP_LOCK(subord);
7971 if (subord->nested_region_asid_bitmap == NULL) {
7972 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
7973 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
7974 subord->nested_region_subord_addr = nstart;
7975 subord->nested_region_size = (mach_vm_offset_t) size;
7976 nested_region_asid_bitmap = NULL;
7977 }
7978 PMAP_UNLOCK(subord);
7979 if (nested_region_asid_bitmap != NULL) {
7980 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size*sizeof(unsigned int));
7981 }
7982 }
7983 if ((subord->nested_region_subord_addr + subord->nested_region_size) < (nstart+size)) {
7984 uint64_t new_size;
7985 unsigned int new_nested_region_asid_bitmap_size;
7986 unsigned int* new_nested_region_asid_bitmap;
7987
7988 nested_region_asid_bitmap = NULL;
7989 nested_region_asid_bitmap_size = 0;
7990 new_size = nstart + size - subord->nested_region_subord_addr;
7991
7992 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
7993 new_nested_region_asid_bitmap_size = (unsigned int)((new_size>>ARM_TT_TWIG_SHIFT)/(sizeof(unsigned int)*NBBY)) + 1;
7994
7995 new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size*sizeof(unsigned int));
7996 PMAP_LOCK(subord);
7997 if (subord->nested_region_size < new_size) {
7998 bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size*sizeof(unsigned int));
7999 bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
8000 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
8001 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
8002 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
8003 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
8004 subord->nested_region_size = new_size;
8005 new_nested_region_asid_bitmap = NULL;
8006 }
8007 PMAP_UNLOCK(subord);
8008 if (nested_region_asid_bitmap != NULL)
8009 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size*sizeof(unsigned int));
8010 if (new_nested_region_asid_bitmap != NULL)
8011 kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size*sizeof(unsigned int));
8012 }
8013
8014 PMAP_LOCK(subord);
8015 if (grand->nested_pmap == PMAP_NULL) {
8016 grand->nested_pmap = subord;
8017 grand->nested_region_grand_addr = vstart;
8018 grand->nested_region_subord_addr = nstart;
8019 grand->nested_region_size = (mach_vm_offset_t) size;
8020 } else {
8021 if ((grand->nested_region_grand_addr > vstart)) {
8022 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
8023 }
8024 else if ((grand->nested_region_grand_addr + grand->nested_region_size) < (vstart+size)) {
8025 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
8026 }
8027 }
8028
8029 #if (__ARM_VMSA__ == 7)
8030 nvaddr = (vm_map_offset_t) nstart;
8031 vaddr = (vm_map_offset_t) vstart;
8032 num_tte = size >> ARM_TT_L1_SHIFT;
8033
8034 for (i = 0; i < num_tte; i++) {
8035 stte_p = pmap_tte(subord, nvaddr);
8036 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
8037 PMAP_UNLOCK(subord);
8038 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
8039
8040 if (kr != KERN_SUCCESS) {
8041 PMAP_LOCK(grand);
8042 goto done;
8043 }
8044
8045 PMAP_LOCK(subord);
8046 }
8047 PMAP_UNLOCK(subord);
8048 PMAP_LOCK(grand);
8049 stte_p = pmap_tte(grand, vaddr);
8050 if (stte_p == (tt_entry_t *)NULL) {
8051 PMAP_UNLOCK(grand);
8052 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
8053
8054 if (kr != KERN_SUCCESS) {
8055 PMAP_LOCK(grand);
8056 goto done;
8057 }
8058 } else {
8059 PMAP_UNLOCK(grand);
8060 kr = KERN_SUCCESS;
8061 }
8062 PMAP_LOCK(subord);
8063
8064
8065 nvaddr += ARM_TT_L1_SIZE;
8066 vaddr += ARM_TT_L1_SIZE;
8067 }
8068
8069 #else
8070 nvaddr = (vm_map_offset_t) nstart;
8071 num_tte = (unsigned int)(size >> ARM_TT_L2_SHIFT);
8072
8073 for (i = 0; i < num_tte; i++) {
8074 stte_p = pmap_tt2e(subord, nvaddr);
8075 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
8076 PMAP_UNLOCK(subord);
8077 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L3_LEVEL);
8078
8079 if (kr != KERN_SUCCESS) {
8080 PMAP_LOCK(grand);
8081 goto done;
8082 }
8083
8084 PMAP_LOCK(subord);
8085 }
8086 nvaddr += ARM_TT_L2_SIZE;
8087 }
8088 #endif
8089 PMAP_UNLOCK(subord);
8090
8091 /*
8092 * copy tte's from subord pmap into grand pmap
8093 */
8094
8095 PMAP_LOCK(grand);
8096 nvaddr = (vm_map_offset_t) nstart;
8097 vaddr = (vm_map_offset_t) vstart;
8098
8099
8100 #if (__ARM_VMSA__ == 7)
8101 for (i = 0; i < num_tte; i++) {
8102
8103 stte_p = pmap_tte(subord, nvaddr);
8104 gtte_p = pmap_tte(grand, vaddr);
8105 *gtte_p = *stte_p;
8106
8107 nvaddr += ARM_TT_L1_SIZE;
8108 vaddr += ARM_TT_L1_SIZE;
8109 }
8110 #else
8111 for (i = 0; i < num_tte; i++) {
8112
8113 stte_p = pmap_tt2e(subord, nstart);
8114 gtte_p = pmap_tt2e(grand, vaddr);
8115 if (gtte_p == PT_ENTRY_NULL) {
8116 PMAP_UNLOCK(grand);
8117 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L2_LEVEL);
8118 PMAP_LOCK(grand);
8119
8120 if (kr != KERN_SUCCESS) {
8121 goto done;
8122 }
8123
8124 gtte_p = pmap_tt2e(grand, vaddr);
8125 }
8126 *gtte_p = *stte_p;
8127 vaddr += ARM_TT_L2_SIZE;
8128 nstart += ARM_TT_L2_SIZE;
8129 }
8130 #endif
8131
8132 kr = KERN_SUCCESS;
8133 done:
8134
8135 #ifndef __ARM_L1_PTW__
8136 CleanPoU_DcacheRegion((vm_offset_t) pmap_tte(grand, vstart), num_tte * sizeof(tt_entry_t));
8137 #endif
8138
8139 #if (__ARM_VMSA__ > 7)
8140 /*
8141 * check for overflow on LP64 arch
8142 */
8143 assert((size & 0xFFFFFFFF00000000ULL) == 0);
8144 #endif
8145 PMAP_UPDATE_TLBS(grand, vstart, vstart + size);
8146
8147 PMAP_UNLOCK(grand);
8148 return kr;
8149 }
8150
8151 kern_return_t pmap_nest(
8152 pmap_t grand,
8153 pmap_t subord,
8154 addr64_t vstart,
8155 addr64_t nstart,
8156 uint64_t size)
8157 {
8158 kern_return_t kr = KERN_FAILURE;
8159
8160 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
8161 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
8162 VM_KERNEL_ADDRHIDE(vstart));
8163
8164 kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
8165
8166 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
8167
8168 return kr;
8169 }
8170
8171 /*
8172 * kern_return_t pmap_unnest(grand, vaddr)
8173 *
8174 * grand = the pmap that we will nest subord into
8175 * vaddr = start of range in pmap to be unnested
8176 * size = size of range in pmap to be unnested
8177 *
8178 */
8179
8180 kern_return_t
8181 pmap_unnest(
8182 pmap_t grand,
8183 addr64_t vaddr,
8184 uint64_t size)
8185 {
8186 return(pmap_unnest_options(grand, vaddr, size, 0));
8187 }
8188
8189 static kern_return_t
8190 pmap_unnest_options_internal(
8191 pmap_t grand,
8192 addr64_t vaddr,
8193 uint64_t size,
8194 unsigned int option)
8195 {
8196 vm_map_offset_t start;
8197 vm_map_offset_t addr;
8198 tt_entry_t *tte_p;
8199 unsigned int current_index;
8200 unsigned int start_index;
8201 unsigned int max_index;
8202 unsigned int num_tte;
8203 unsigned int i;
8204
8205 #if (__ARM_VMSA__ == 7)
8206 if (((size|vaddr) & ARM_TT_L1_PT_OFFMASK) != 0x0ULL) {
8207 panic("pmap_unnest(): unaligned request\n");
8208 }
8209 #else
8210 if (((size|vaddr) & ARM_TT_L2_OFFMASK) != 0x0ULL) {
8211 panic("pmap_unnest(): unaligned request\n");
8212 }
8213 #endif
8214
8215 if ((option & PMAP_UNNEST_CLEAN) == 0)
8216 {
8217 PMAP_LOCK(grand->nested_pmap);
8218
8219 start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr ;
8220 start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> ARM_TT_TWIG_SHIFT);
8221 max_index = (unsigned int)(start_index + (size >> ARM_TT_TWIG_SHIFT));
8222 num_tte = (unsigned int)(size >> ARM_TT_TWIG_SHIFT);
8223
8224 if (size > grand->nested_region_size) {
8225 panic("pmap_unnest() pmap %p %llu, %llu\n", grand, size, (uint64_t)grand->nested_region_size);
8226 }
8227
8228 for (current_index = start_index, addr = start; current_index < max_index; current_index++) {
8229 pt_entry_t *bpte, *epte, *cpte;
8230
8231
8232 if(!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
8233
8234 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
8235 bpte = pmap_pte(grand->nested_pmap, addr);
8236 epte = bpte + (ARM_TT_LEAF_INDEX_MASK>>ARM_TT_LEAF_SHIFT);
8237
8238 for (cpte = bpte; cpte <= epte; cpte++) {
8239 pmap_paddr_t pa;
8240 int pai=0;
8241 boolean_t managed=FALSE;
8242 pt_entry_t spte;
8243
8244 if ((*cpte != ARM_PTE_TYPE_FAULT)
8245 && (!ARM_PTE_IS_COMPRESSED(*cpte))) {
8246
8247 spte = *cpte;
8248 while (!managed) {
8249 pa = pte_to_pa(spte);
8250 if (!pa_valid(pa))
8251 break;
8252 pai = (int)pa_index(pa);
8253 LOCK_PVH(pai);
8254 spte = *cpte;
8255 pa = pte_to_pa(spte);
8256 if (pai == (int)pa_index(pa)) {
8257 managed =TRUE;
8258 break; // Leave the PVH locked as we'll unlock it after we update the PTE
8259 }
8260 UNLOCK_PVH(pai);
8261 }
8262
8263 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
8264
8265 WRITE_PTE(cpte, (spte | ARM_PTE_NG));
8266 }
8267
8268 if (managed)
8269 {
8270 ASSERT_PVH_LOCKED(pai);
8271 UNLOCK_PVH(pai);
8272 }
8273 }
8274 }
8275 }
8276
8277 addr += ARM_TT_TWIG_SIZE;
8278
8279 #ifndef __ARM_L1_PTW__
8280 CleanPoU_DcacheRegion((vm_offset_t) pmap_pte(grand->nested_pmap, start), num_tte * sizeof(tt_entry_t));
8281 #endif
8282 PMAP_UPDATE_TLBS(grand->nested_pmap, start, start + size);
8283 }
8284
8285 PMAP_UNLOCK(grand->nested_pmap);
8286 }
8287
8288 PMAP_LOCK(grand);
8289
8290 /*
8291 * invalidate all pdes for segment at vaddr in pmap grand
8292 */
8293 start = vaddr;
8294 addr = vaddr;
8295
8296 num_tte = (unsigned int)(size >> ARM_TT_TWIG_SHIFT);
8297
8298 for (i = 0; i < num_tte; i++) {
8299 tte_p = pmap_tte(grand, addr);
8300 *tte_p = ARM_TTE_TYPE_FAULT;
8301
8302 addr += ARM_TT_TWIG_SIZE;
8303 }
8304
8305 #ifndef __ARM_L1_PTW__
8306 CleanPoU_DcacheRegion((vm_offset_t) pmap_tte(grand, start), num_tte * sizeof(tt_entry_t));
8307 #endif
8308 PMAP_UPDATE_TLBS(grand, start, start + size);
8309
8310 PMAP_UNLOCK(grand);
8311
8312 return KERN_SUCCESS;
8313 }
8314
8315 kern_return_t
8316 pmap_unnest_options(
8317 pmap_t grand,
8318 addr64_t vaddr,
8319 uint64_t size,
8320 unsigned int option)
8321 {
8322 kern_return_t kr = KERN_FAILURE;
8323
8324 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
8325 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
8326
8327 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
8328
8329 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
8330
8331 return kr;
8332 }
8333
8334 boolean_t
8335 pmap_adjust_unnest_parameters(
8336 __unused pmap_t p,
8337 __unused vm_map_offset_t *s,
8338 __unused vm_map_offset_t *e)
8339 {
8340 return TRUE; /* to get to log_unnest_badness()... */
8341 }
8342
8343 /*
8344 * disable no-execute capability on
8345 * the specified pmap
8346 */
8347 #if DEVELOPMENT || DEBUG
8348 void
8349 pmap_disable_NX(
8350 pmap_t pmap)
8351 {
8352 pmap->nx_enabled = FALSE;
8353 }
8354 #else
8355 void
8356 pmap_disable_NX(
8357 __unused pmap_t pmap)
8358 {
8359 }
8360 #endif
8361
8362 void
8363 pt_fake_zone_init(
8364 int zone_index)
8365 {
8366 pt_fake_zone_index = zone_index;
8367 }
8368
8369 void
8370 pt_fake_zone_info(
8371 int *count,
8372 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
8373 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
8374 {
8375 *count = inuse_pmap_pages_count;
8376 *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
8377 *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
8378 *elem_size = PAGE_SIZE;
8379 *alloc_size = PAGE_SIZE;
8380 *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
8381
8382 *collectable = 1;
8383 *exhaustable = 0;
8384 *caller_acct = 1;
8385 }
8386
8387 /*
8388 * flush a range of hardware TLB entries.
8389 * NOTE: assumes the smallest TLB entry in use will be for
8390 * an ARM small page (4K).
8391 */
8392
8393 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
8394 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
8395
8396 static void
8397 flush_mmu_tlb_region_asid(
8398 vm_offset_t va,
8399 unsigned length,
8400 pmap_t pmap)
8401 {
8402 #if (__ARM_VMSA__ == 7)
8403 vm_offset_t end = va + length;
8404 uint32_t asid;
8405
8406 asid = pmap->asid;
8407
8408 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
8409 boolean_t flush_all = FALSE;
8410
8411 if ((asid == 0) || (pmap->nested == TRUE))
8412 flush_all = TRUE;
8413 if (flush_all)
8414 flush_mmu_tlb();
8415 else
8416 flush_mmu_tlb_asid(asid);
8417
8418 return;
8419 }
8420 if (pmap->nested == TRUE) {
8421 #if !__ARM_MP_EXT__
8422 flush_mmu_tlb();
8423 #else
8424 va = arm_trunc_page(va);
8425 while (va < end) {
8426 flush_mmu_tlb_mva_entries(va);
8427 va += ARM_SMALL_PAGE_SIZE;
8428 }
8429 #endif
8430 return;
8431 }
8432 va = arm_trunc_page(va) | (asid & 0xff);
8433 flush_mmu_tlb_entries(va, end);
8434
8435 #else
8436 vm_offset_t end = va + length;
8437 uint32_t asid;
8438
8439 asid = pmap->asid;
8440
8441 if ((length >> ARM_TT_L3_SHIFT) > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
8442 boolean_t flush_all = FALSE;
8443
8444 if ((asid == 0) || (pmap->nested == TRUE))
8445 flush_all = TRUE;
8446 if (flush_all)
8447 flush_mmu_tlb();
8448 else
8449 flush_mmu_tlb_asid((uint64_t)asid << TLBI_ASID_SHIFT);
8450 return;
8451 }
8452 va = tlbi_asid(asid) | tlbi_addr(va);
8453 end = tlbi_asid(asid) | tlbi_addr(end);
8454 if (pmap->nested == TRUE) {
8455 flush_mmu_tlb_allentries(va, end);
8456 } else {
8457 flush_mmu_tlb_entries(va, end);
8458 }
8459
8460 #endif
8461 }
8462
8463 void
8464 flush_mmu_tlb_region(
8465 vm_offset_t va,
8466 unsigned length)
8467 {
8468 flush_mmu_tlb_region_asid(va, length, kernel_pmap);
8469 }
8470
8471 unsigned int
8472 pmap_cache_attributes(
8473 ppnum_t pn)
8474 {
8475 pmap_paddr_t paddr;
8476 int pai;
8477 unsigned int result;
8478 pp_attr_t pp_attr_current;
8479
8480 paddr = ptoa(pn);
8481
8482 if ((paddr >= io_rgn_start) && (paddr < io_rgn_end)) {
8483 unsigned int attr = IO_ATTR_WIMG(io_attr_table[(paddr - io_rgn_start) / io_rgn_granule]);
8484 if (attr)
8485 return attr;
8486 else
8487 return (VM_WIMG_IO);
8488 }
8489
8490
8491 if (!pmap_initialized) {
8492 if ((paddr >= gPhysBase) && (paddr < gPhysBase+gPhysSize))
8493 return (VM_WIMG_DEFAULT);
8494 else
8495 return (VM_WIMG_IO);
8496 }
8497
8498
8499 if (!pa_valid(paddr))
8500 return (VM_WIMG_IO);
8501
8502 result = VM_WIMG_DEFAULT;
8503
8504 pai = (int)pa_index(paddr);
8505
8506 pp_attr_current = pp_attr_table[pai];
8507 if (pp_attr_current & PP_ATTR_WIMG_MASK)
8508 result = pp_attr_current & PP_ATTR_WIMG_MASK;
8509 return result;
8510 }
8511
8512 static boolean_t
8513 pmap_batch_set_cache_attributes_internal(
8514 ppnum_t pn,
8515 unsigned int cacheattr,
8516 unsigned int page_cnt,
8517 unsigned int page_index,
8518 boolean_t doit,
8519 unsigned int *res)
8520 {
8521 pmap_paddr_t paddr;
8522 int pai;
8523 pp_attr_t pp_attr_current;
8524 pp_attr_t pp_attr_template;
8525 unsigned int wimg_bits_prev, wimg_bits_new;
8526
8527 if (cacheattr & VM_WIMG_USE_DEFAULT)
8528 cacheattr = VM_WIMG_DEFAULT;
8529
8530 if ((doit == FALSE) && (*res == 0)) {
8531 *res = page_cnt;
8532 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt<<PAGE_SHIFT) == FALSE) {
8533 return FALSE;
8534 }
8535 }
8536
8537 paddr = ptoa(pn);
8538
8539 if (!pa_valid(paddr)) {
8540 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed\n", pn);
8541 }
8542
8543 pai = (int)pa_index(paddr);
8544
8545 if (doit)
8546 LOCK_PVH(pai);
8547
8548 pp_attr_current = pp_attr_table[pai];
8549 wimg_bits_prev = VM_WIMG_DEFAULT;
8550 if (pp_attr_current & PP_ATTR_WIMG_MASK)
8551 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
8552
8553 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
8554
8555 if (doit)
8556 pp_attr_table[pai] = pp_attr_template;
8557
8558 wimg_bits_new = VM_WIMG_DEFAULT;
8559 if (pp_attr_template & PP_ATTR_WIMG_MASK)
8560 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
8561
8562 if (doit) {
8563 if (wimg_bits_new != wimg_bits_prev)
8564 pmap_update_cache_attributes_locked(pn, cacheattr);
8565 UNLOCK_PVH(pai);
8566 } else {
8567 if (wimg_bits_new == VM_WIMG_COPYBACK) {
8568 return FALSE;
8569 }
8570 if (wimg_bits_prev == wimg_bits_new) {
8571 *res = *res-1;
8572 if (!platform_cache_batch_wimg(wimg_bits_new, (*res)<<PAGE_SHIFT)) {
8573 return FALSE;
8574 }
8575 }
8576 return TRUE;
8577 }
8578
8579 if (page_cnt == (page_index+1)) {
8580 wimg_bits_prev = VM_WIMG_COPYBACK;
8581 if (((page_cnt == (page_index+1)) && (wimg_bits_prev != wimg_bits_new))
8582 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
8583 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
8584 && (wimg_bits_new != VM_WIMG_COPYBACK))
8585 || ((wimg_bits_prev == VM_WIMG_WTHRU)
8586 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
8587 platform_cache_flush_wimg(wimg_bits_new);
8588 }
8589 }
8590
8591 return TRUE;
8592 };
8593
8594 boolean_t
8595 pmap_batch_set_cache_attributes(
8596 ppnum_t pn,
8597 unsigned int cacheattr,
8598 unsigned int page_cnt,
8599 unsigned int page_index,
8600 boolean_t doit,
8601 unsigned int *res)
8602 {
8603 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
8604 }
8605
8606 static void
8607 pmap_set_cache_attributes_internal(
8608 ppnum_t pn,
8609 unsigned int cacheattr)
8610 {
8611 pmap_paddr_t paddr;
8612 int pai;
8613 pp_attr_t pp_attr_current;
8614 pp_attr_t pp_attr_template;
8615 unsigned int wimg_bits_prev, wimg_bits_new;
8616
8617 paddr = ptoa(pn);
8618
8619 if (!pa_valid(paddr)) {
8620 return; /* Not a managed page. */
8621 }
8622
8623 if (cacheattr & VM_WIMG_USE_DEFAULT)
8624 cacheattr = VM_WIMG_DEFAULT;
8625
8626 pai = (int)pa_index(paddr);
8627
8628 LOCK_PVH(pai);
8629
8630 pp_attr_current = pp_attr_table[pai];
8631 wimg_bits_prev = VM_WIMG_DEFAULT;
8632 if (pp_attr_current & PP_ATTR_WIMG_MASK)
8633 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
8634
8635 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK)) ;
8636
8637 pp_attr_table[pai] = pp_attr_template;
8638 wimg_bits_new = VM_WIMG_DEFAULT;
8639 if (pp_attr_template & PP_ATTR_WIMG_MASK)
8640 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
8641
8642 if (wimg_bits_new != wimg_bits_prev)
8643 pmap_update_cache_attributes_locked(pn, cacheattr);
8644
8645 UNLOCK_PVH(pai);
8646
8647 if ((wimg_bits_prev != wimg_bits_new)
8648 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
8649 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
8650 && (wimg_bits_new != VM_WIMG_COPYBACK))
8651 || ((wimg_bits_prev == VM_WIMG_WTHRU)
8652 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK)))))
8653 pmap_sync_page_attributes_phys(pn);
8654
8655 }
8656
8657 void
8658 pmap_set_cache_attributes(
8659 ppnum_t pn,
8660 unsigned int cacheattr)
8661 {
8662 pmap_set_cache_attributes_internal(pn, cacheattr);
8663 }
8664
8665 void
8666 pmap_update_cache_attributes_locked(
8667 ppnum_t ppnum,
8668 unsigned attributes)
8669 {
8670 pmap_paddr_t phys = ptoa(ppnum);
8671 pv_entry_t *pve_p;
8672 pt_entry_t *pte_p;
8673 pv_entry_t **pv_h;
8674 pt_entry_t tmplate;
8675 unsigned int pai;
8676
8677 #if (__ARM_VMSA__ == 7)
8678 #define ARM_PTE_SHMASK ARM_PTE_SH
8679 #endif
8680
8681 #if __ARM_PTE_PHYSMAP__
8682 vm_offset_t kva = phystokv(phys);
8683 pte_p = pmap_pte(kernel_pmap, kva);
8684
8685 tmplate = *pte_p;
8686 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
8687 tmplate |= wimg_to_pte(attributes);
8688
8689 WRITE_PTE(pte_p, tmplate);
8690 PMAP_UPDATE_TLBS(kernel_pmap, kva, kva + PAGE_SIZE);
8691 #endif
8692
8693 pai = (unsigned int)pa_index(phys);
8694
8695 pv_h = pai_to_pvh(pai);
8696
8697 pte_p = PT_ENTRY_NULL;
8698 pve_p = PV_ENTRY_NULL;
8699 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
8700 pte_p = pvh_ptep(pv_h);
8701 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
8702 pve_p = pvh_list(pv_h);
8703 pte_p = PT_ENTRY_NULL;
8704 }
8705
8706 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
8707 vm_map_address_t va;
8708 pmap_t pmap;
8709
8710 if (pve_p != PV_ENTRY_NULL)
8711 pte_p = pve_get_ptep(pve_p);
8712
8713 pmap = ptep_get_pmap(pte_p);
8714 va = ptep_get_va(pte_p);
8715
8716 tmplate = *pte_p;
8717 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
8718 tmplate |= wimg_to_pte(attributes);
8719
8720 WRITE_PTE(pte_p, tmplate);
8721 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
8722
8723 pte_p = PT_ENTRY_NULL;
8724 if (pve_p != PV_ENTRY_NULL)
8725 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
8726
8727 }
8728 }
8729
8730 #if (__ARM_VMSA__ == 7)
8731 vm_map_address_t
8732 pmap_create_sharedpage(
8733 void)
8734 {
8735 pmap_paddr_t pa;
8736 kern_return_t kr;
8737
8738 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
8739 memset((char *) phystokv(pa), 0, PAGE_SIZE);
8740
8741 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
8742 assert(kr == KERN_SUCCESS);
8743
8744 return((vm_map_address_t)phystokv(pa));
8745
8746 }
8747 #else
8748 static void
8749 pmap_update_tt3e(
8750 pmap_t pmap,
8751 vm_address_t address,
8752 tt_entry_t template)
8753 {
8754 tt_entry_t *ptep, pte;
8755
8756 ptep = pmap_tt3e(pmap, address);
8757 if (ptep == NULL) {
8758 panic("%s: no ptep?\n", __FUNCTION__);
8759 }
8760
8761 pte = *ptep;
8762 pte = tte_to_pa(pte) | template;
8763 WRITE_PTE(ptep, pte);
8764 }
8765
8766 /* Note absence of non-global bit */
8767 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
8768 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
8769 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
8770 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
8771
8772 vm_map_address_t
8773 pmap_create_sharedpage(
8774 void
8775 )
8776 {
8777 kern_return_t kr;
8778 pmap_paddr_t pa = 0;
8779
8780
8781 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
8782
8783 memset((char *) phystokv(pa), 0, PAGE_SIZE);
8784
8785 /*
8786 * The kernel pmap maintains a user accessible mapping of the commpage
8787 * to test PAN.
8788 */
8789 kr = pmap_expand(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, 0, PMAP_TT_L3_LEVEL);
8790 assert(kr == KERN_SUCCESS);
8791 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
8792 assert(kr == KERN_SUCCESS);
8793
8794 /*
8795 * This mapping should not be global (as we only expect to reference it
8796 * during testing).
8797 */
8798 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
8799
8800 /*
8801 * With PAN enabled kernel drivers can no longer use the previous mapping which is user readable
8802 * They should use the following mapping instead
8803 */
8804 kr = pmap_expand(kernel_pmap, _COMM_PRIV_PAGE64_BASE_ADDRESS, 0, PMAP_TT_L3_LEVEL);
8805 assert(kr == KERN_SUCCESS);
8806 kr = pmap_enter(kernel_pmap, _COMM_PRIV_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
8807 assert(kr == KERN_SUCCESS);
8808
8809 /*
8810 * In order to avoid burning extra pages on mapping the shared page, we
8811 * create a dedicated pmap for the shared page. We forcibly nest the
8812 * translation tables from this pmap into other pmaps. The level we
8813 * will nest at depends on the MMU configuration (page size, TTBR range,
8814 * etc).
8815 *
8816 * Note that this is NOT "the nested pmap" (which is used to nest the
8817 * shared cache).
8818 *
8819 * Note that we update parameters of the entry for our unique needs (NG
8820 * entry, etc.).
8821 */
8822 sharedpage_pmap = pmap_create(NULL, 0x0, FALSE);
8823 assert(sharedpage_pmap != NULL);
8824
8825 /* The user 64-bit mapping... */
8826 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
8827 assert(kr == KERN_SUCCESS);
8828 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
8829
8830 /* ...and the user 32-bit mapping. */
8831 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
8832 assert(kr == KERN_SUCCESS);
8833 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
8834
8835 /* For manipulation in kernel, go straight to physical page */
8836 sharedpage_rw_addr = phystokv(pa);
8837 return((vm_map_address_t)sharedpage_rw_addr);
8838 }
8839
8840 /*
8841 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
8842 * with user controlled TTEs.
8843 */
8844 #if (ARM_PGSHIFT == 14) || __ARM64_TWO_LEVEL_PMAP__
8845 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
8846 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
8847 #elif (ARM_PGSHIFT == 12)
8848 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
8849 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
8850 #else
8851 #error Nested shared page mapping is unsupported on this config
8852 #endif
8853
8854 static void
8855 pmap_insert_sharedpage_internal(
8856 pmap_t pmap)
8857 {
8858 #if (ARM_PGSHIFT == 14) && !__ARM64_TWO_LEVEL_PMAP__
8859 kern_return_t kr;
8860 #endif
8861 vm_offset_t sharedpage_vaddr;
8862 pt_entry_t *ttep, *src_ttep;
8863 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
8864 #error We assume a single page.
8865 #endif
8866
8867 if (pmap_is_64bit(pmap)) {
8868 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
8869 } else {
8870 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
8871 }
8872
8873 PMAP_LOCK(pmap);
8874
8875 /*
8876 * For 4KB pages, we can force the commpage to nest at the level one
8877 * page table, as each entry is 1GB (i.e, there will be no overlap
8878 * with regular userspace mappings). For 16KB pages, each level one
8879 * entry is 64GB, so we must go to the second level entry (32MB) in
8880 * order to nest.
8881 */
8882 #if (ARM_PGSHIFT == 12)
8883 #if __ARM64_TWO_LEVEL_PMAP__
8884 #error A two level page table with a page shift of 12 is not currently supported
8885 #endif
8886 /* Just slam in the L1 entry. */
8887 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
8888
8889 if (*ttep != ARM_PTE_EMPTY) {
8890 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
8891 }
8892
8893 src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
8894 #elif (ARM_PGSHIFT == 14)
8895 #if !__ARM64_TWO_LEVEL_PMAP__
8896 /* Allocate for the L2 entry if necessary, and slam it into place. */
8897 /*
8898 * As long as we are use a three level page table, the first level
8899 * should always exist, so we don't need to check for it.
8900 */
8901 while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
8902 PMAP_UNLOCK(pmap);
8903
8904 kr = pmap_expand(pmap, _COMM_PAGE32_BASE_ADDRESS, 0, PMAP_TT_L2_LEVEL);
8905
8906 if (kr != KERN_SUCCESS) {
8907 panic("Failed to pmap_expand for 32-bit commpage, pmap=%p", pmap);
8908 }
8909
8910 PMAP_LOCK(pmap);
8911 }
8912 #endif
8913
8914 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
8915
8916 if (*ttep != ARM_PTE_EMPTY) {
8917 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
8918 }
8919
8920 src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
8921 #endif
8922
8923 *ttep = *src_ttep;
8924 #ifndef __ARM_L1_PTW__
8925 CleanPoU_DcacheRegion((vm_offset_t) ttep, sizeof(tt_entry_t));
8926 #endif
8927 /* TODO: Should we flush in the 64-bit case? */
8928 flush_mmu_tlb_region(sharedpage_vaddr, PAGE_SIZE);
8929
8930 #if (ARM_PGSHIFT == 12) && !__ARM64_TWO_LEVEL_PMAP__
8931 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->asid));
8932 #elif (ARM_PGSHIFT == 14)
8933 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
8934 #endif
8935
8936 PMAP_UNLOCK(pmap);
8937 }
8938
8939 static void
8940 pmap_sharedpage_flush_32_to_64(
8941 void)
8942 {
8943 flush_mmu_tlb_region(_COMM_PAGE32_BASE_ADDRESS, PAGE_SIZE);
8944 }
8945
8946 static void
8947 pmap_unmap_sharedpage(
8948 pmap_t pmap)
8949 {
8950 pt_entry_t *ttep;
8951 vm_offset_t sharedpage_vaddr;
8952
8953 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
8954 #error We assume a single page.
8955 #endif
8956
8957 if (pmap_is_64bit(pmap)) {
8958 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
8959 } else {
8960 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
8961 }
8962
8963 #if (ARM_PGSHIFT == 12)
8964 #if __ARM64_TWO_LEVEL_PMAP__
8965 #error A two level page table with a page shift of 12 is not currently supported
8966 #endif
8967 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
8968
8969 if (ttep == NULL) {
8970 return;
8971 }
8972
8973 /* It had better be mapped to the shared page */
8974 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
8975 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
8976 }
8977 #elif (ARM_PGSHIFT == 14)
8978 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
8979
8980 if (ttep == NULL) {
8981 return;
8982 }
8983
8984 /* It had better be mapped to the shared page */
8985 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
8986 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
8987 }
8988 #endif
8989
8990 *ttep = ARM_TTE_EMPTY;
8991 flush_mmu_tlb_region(sharedpage_vaddr, PAGE_SIZE);
8992
8993 #if (ARM_PGSHIFT == 12)
8994 #if __ARM64_TWO_LEVEL_PMAP__
8995 #error A two level page table with a page shift of 12 is not currently supported
8996 #endif
8997 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->asid));
8998 #elif (ARM_PGSHIFT == 14)
8999 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->asid));
9000 #endif
9001 }
9002
9003 void
9004 pmap_insert_sharedpage(
9005 pmap_t pmap)
9006 {
9007 pmap_insert_sharedpage_internal(pmap);
9008 }
9009
9010 static boolean_t
9011 pmap_is_64bit(
9012 pmap_t pmap)
9013 {
9014 return (pmap->is_64bit);
9015 }
9016
9017 #endif
9018
9019 /* ARMTODO -- an implementation that accounts for
9020 * holes in the physical map, if any.
9021 */
9022 boolean_t
9023 pmap_valid_page(
9024 ppnum_t pn) {
9025 return pa_valid(ptoa(pn));
9026 }
9027
9028 static boolean_t
9029 pmap_is_empty_internal(
9030 pmap_t pmap,
9031 vm_map_offset_t va_start,
9032 vm_map_offset_t va_end)
9033 {
9034 vm_map_offset_t block_start, block_end;
9035 tt_entry_t *tte_p;
9036
9037 if (pmap == NULL) {
9038 return TRUE;
9039 }
9040
9041 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9042 PMAP_LOCK(pmap);
9043 }
9044
9045 #if (__ARM_VMSA__ == 7)
9046 if (tte_index(pmap, va_end) >= pmap->tte_index_max) {
9047 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9048 PMAP_UNLOCK(pmap);
9049 }
9050 return TRUE;
9051 }
9052
9053 block_start = va_start;
9054 tte_p = pmap_tte(pmap, block_start);
9055 while (block_start < va_end) {
9056 block_end = (block_start + ARM_TT_L1_SIZE) & ~(ARM_TT_L1_OFFMASK);
9057 if (block_end > va_end)
9058 block_end = va_end;
9059
9060 if ((*tte_p & ARM_TTE_TYPE_MASK) != 0) {
9061 vm_map_offset_t offset;
9062 ppnum_t phys_page = 0;
9063
9064 for (offset = block_start;
9065 offset < block_end;
9066 offset += ARM_PGBYTES) {
9067 // This does a pmap_find_phys() lookup but assumes lock is held
9068 phys_page = pmap_vtophys(pmap, offset);
9069 if (phys_page) {
9070 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9071 PMAP_UNLOCK(pmap);
9072 }
9073 return FALSE;
9074 }
9075 }
9076 }
9077
9078 block_start = block_end;
9079 tte_p++;
9080 }
9081 #else
9082 block_start = va_start;
9083
9084 while (block_start < va_end) {
9085 pt_entry_t *bpte_p, *epte_p;
9086 pt_entry_t *pte_p;
9087
9088 block_end = (block_start + ARM_TT_L2_SIZE) & ~ARM_TT_L2_OFFMASK;
9089 if (block_end > va_end)
9090 block_end = va_end;
9091
9092 tte_p = pmap_tt2e(pmap, block_start);
9093 if ((tte_p != PT_ENTRY_NULL)
9094 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
9095
9096 pte_p = (pt_entry_t *) ttetokv(*tte_p);
9097 bpte_p = &pte_p[tt3_index(pmap, block_start)];
9098 epte_p = bpte_p + (((block_end - block_start) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
9099
9100 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
9101 if (*pte_p != ARM_PTE_EMPTY) {
9102 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9103 PMAP_UNLOCK(pmap);
9104 }
9105 return FALSE;
9106 }
9107 }
9108 }
9109 block_start = block_end;
9110 }
9111 #endif
9112
9113 if ((pmap != kernel_pmap) && (not_in_kdp)) {
9114 PMAP_UNLOCK(pmap);
9115 }
9116
9117 return TRUE;
9118 }
9119
9120 boolean_t
9121 pmap_is_empty(
9122 pmap_t pmap,
9123 vm_map_offset_t va_start,
9124 vm_map_offset_t va_end)
9125 {
9126 return pmap_is_empty_internal(pmap, va_start, va_end);
9127 }
9128
9129 vm_map_offset_t pmap_max_offset(
9130 boolean_t is64 __unused,
9131 unsigned int option)
9132 {
9133 vm_map_offset_t max_offset_ret = 0;
9134
9135 #if defined(__arm64__)
9136 assert (is64);
9137 vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
9138 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
9139 max_offset_ret = arm64_pmap_max_offset_default;
9140 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
9141 max_offset_ret = min_max_offset;
9142 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
9143 max_offset_ret = MACH_VM_MAX_ADDRESS;
9144 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
9145 if (arm64_pmap_max_offset_default) {
9146 max_offset_ret = arm64_pmap_max_offset_default;
9147 } else if (max_mem > 0xC0000000) {
9148 max_offset_ret = 0x0000000318000000ULL; // Max offset is 12.375GB for devices with > 3GB of memory
9149 } else if (max_mem > 0x40000000) {
9150 max_offset_ret = 0x0000000218000000ULL; // Max offset is 8.375GB for devices with > 1GB and <= 3GB of memory
9151 } else {
9152 max_offset_ret = min_max_offset;
9153 }
9154 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
9155 if (arm64_pmap_max_offset_default) {
9156 // Allow the boot-arg to override jumbo size
9157 max_offset_ret = arm64_pmap_max_offset_default;
9158 } else {
9159 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is MACH_VM_MAX_ADDRESS for pmaps with special "jumbo" blessing
9160 }
9161 } else {
9162 panic("pmap_max_offset illegal option 0x%x\n", option);
9163 }
9164
9165 assert(max_offset_ret >= min_max_offset);
9166 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
9167 return max_offset_ret;
9168 #else
9169 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
9170 max_offset_ret = arm_pmap_max_offset_default;
9171 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
9172 max_offset_ret = 0x66000000;
9173 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
9174 max_offset_ret = VM_MAX_ADDRESS;
9175 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
9176 if (arm_pmap_max_offset_default) {
9177 max_offset_ret = arm_pmap_max_offset_default;
9178 } else if (max_mem > 0x20000000) {
9179 max_offset_ret = 0x80000000;
9180 } else {
9181 max_offset_ret = 0x66000000;
9182 }
9183 } else {
9184 panic("pmap_max_offset illegal option 0x%x\n", option);
9185 }
9186
9187 assert(max_offset_ret <= VM_MAX_ADDRESS);
9188 return max_offset_ret;
9189 #endif
9190 }
9191
9192 #if CONFIG_DTRACE
9193 /*
9194 * Constrain DTrace copyin/copyout actions
9195 */
9196 extern kern_return_t dtrace_copyio_preflight(addr64_t);
9197 extern kern_return_t dtrace_copyio_postflight(addr64_t);
9198
9199 kern_return_t dtrace_copyio_preflight(
9200 __unused addr64_t va)
9201 {
9202 if (current_map() == kernel_map)
9203 return KERN_FAILURE;
9204 else
9205 return KERN_SUCCESS;
9206 }
9207
9208 kern_return_t dtrace_copyio_postflight(
9209 __unused addr64_t va)
9210 {
9211 return KERN_SUCCESS;
9212 }
9213 #endif /* CONFIG_DTRACE */
9214
9215
9216 void
9217 pmap_flush_context_init(__unused pmap_flush_context *pfc)
9218 {
9219 }
9220
9221
9222 void
9223 pmap_flush(
9224 __unused pmap_flush_context *cpus_to_flush)
9225 {
9226 /* not implemented yet */
9227 return;
9228 }
9229
9230 static boolean_t
9231 pmap_query_resident_internal(
9232 pmap_t pmap,
9233 vm_map_address_t start,
9234 vm_map_address_t end,
9235 mach_vm_size_t *resident_bytes_p,
9236 mach_vm_size_t *compressed_bytes_p)
9237 {
9238 mach_vm_size_t resident_bytes = 0;
9239 mach_vm_size_t compressed_bytes = 0;
9240
9241 pt_entry_t *bpte, *epte;
9242 pt_entry_t *pte_p;
9243 tt_entry_t *tte_p;
9244
9245 if (pmap == NULL) {
9246 return FALSE;
9247 }
9248
9249 /* Ensure that this request is valid, and addresses exactly one TTE. */
9250 assert(!(start % ARM_PGBYTES));
9251 assert(!(end % ARM_PGBYTES));
9252 assert(end >= start);
9253 assert((end - start) <= (PTE_PGENTRIES * ARM_PGBYTES));
9254
9255 PMAP_LOCK(pmap);
9256 tte_p = pmap_tte(pmap, start);
9257 if (tte_p == (tt_entry_t *) NULL) {
9258 PMAP_UNLOCK(pmap);
9259 return FALSE;
9260 }
9261 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
9262
9263 #if (__ARM_VMSA__ == 7)
9264 pte_p = (pt_entry_t *) ttetokv(*tte_p);
9265 bpte = &pte_p[ptenum(start)];
9266 epte = bpte + atop(end - start);
9267 #else
9268 pte_p = (pt_entry_t *) ttetokv(*tte_p);
9269 bpte = &pte_p[tt3_index(pmap, start)];
9270 epte = bpte + ((end - start) >> ARM_TT_L3_SHIFT);
9271 #endif
9272
9273 for (; bpte < epte; bpte++) {
9274 if (ARM_PTE_IS_COMPRESSED(*bpte)) {
9275 compressed_bytes += ARM_PGBYTES;
9276 } else if (pa_valid(pte_to_pa(*bpte))) {
9277 resident_bytes += ARM_PGBYTES;
9278 }
9279 }
9280 }
9281 PMAP_UNLOCK(pmap);
9282
9283 if (compressed_bytes_p) {
9284 *compressed_bytes_p += compressed_bytes;
9285 }
9286
9287 if (resident_bytes_p) {
9288 *resident_bytes_p += resident_bytes;
9289 }
9290
9291 return TRUE;
9292 }
9293
9294 mach_vm_size_t
9295 pmap_query_resident(
9296 pmap_t pmap,
9297 vm_map_address_t start,
9298 vm_map_address_t end,
9299 mach_vm_size_t *compressed_bytes_p)
9300 {
9301 mach_vm_size_t resident_bytes;
9302 mach_vm_size_t compressed_bytes;
9303 vm_map_address_t va;
9304
9305
9306 if (pmap == PMAP_NULL) {
9307 if (compressed_bytes_p) {
9308 *compressed_bytes_p = 0;
9309 }
9310 return 0;
9311 }
9312
9313 resident_bytes = 0;
9314 compressed_bytes = 0;
9315
9316 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
9317 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
9318 VM_KERNEL_ADDRHIDE(end));
9319
9320 va = start;
9321 while (va < end) {
9322 vm_map_address_t l;
9323
9324 l = ((va + ARM_TT_TWIG_SIZE) & ~ARM_TT_TWIG_OFFMASK);
9325
9326 if (l > end)
9327 l = end;
9328 if (!pmap_query_resident_internal(pmap, va, l, &resident_bytes, compressed_bytes_p)) {
9329 break;
9330 }
9331
9332 va = l;
9333 }
9334
9335 if (compressed_bytes_p) {
9336 *compressed_bytes_p = compressed_bytes;
9337 }
9338
9339 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
9340 resident_bytes);
9341
9342 return resident_bytes;
9343 }
9344
9345 #if MACH_ASSERT
9346 extern int pmap_ledgers_panic;
9347 static void
9348 pmap_check_ledgers(
9349 pmap_t pmap)
9350 {
9351 ledger_amount_t bal;
9352 int pid;
9353 char *procname;
9354 boolean_t do_panic;
9355
9356 if (pmap->pmap_pid == 0) {
9357 /*
9358 * This pmap was not or is no longer fully associated
9359 * with a task (e.g. the old pmap after a fork()/exec() or
9360 * spawn()). Its "ledger" still points at a task that is
9361 * now using a different (and active) address space, so
9362 * we can't check that all the pmap ledgers are balanced here.
9363 *
9364 * If the "pid" is set, that means that we went through
9365 * pmap_set_process() in task_terminate_internal(), so
9366 * this task's ledger should not have been re-used and
9367 * all the pmap ledgers should be back to 0.
9368 */
9369 return;
9370 }
9371
9372 do_panic = FALSE;
9373 pid = pmap->pmap_pid;
9374 procname = pmap->pmap_procname;
9375
9376 pmap_ledgers_drift.num_pmaps_checked++;
9377
9378 ledger_get_balance(pmap->ledger,
9379 task_ledgers.phys_footprint,
9380 &bal);
9381 if (bal != 0) {
9382 #if DEVELOPMENT || DEBUG
9383 // if (!pmap->footprint_was_suspended)
9384 #endif /* DEVELOPMENT || DEBUG */
9385 do_panic = TRUE;
9386 printf("LEDGER BALANCE proc %d (%s) "
9387 "\"phys_footprint\" = %lld\n",
9388 pid, procname, bal);
9389 if (bal > 0) {
9390 pmap_ledgers_drift.phys_footprint_over++;
9391 pmap_ledgers_drift.phys_footprint_over_total += bal;
9392 if (bal > pmap_ledgers_drift.phys_footprint_over_max) {
9393 pmap_ledgers_drift.phys_footprint_over_max = bal;
9394 }
9395 } else {
9396 pmap_ledgers_drift.phys_footprint_under++;
9397 pmap_ledgers_drift.phys_footprint_under_total += bal;
9398 if (bal < pmap_ledgers_drift.phys_footprint_under_max) {
9399 pmap_ledgers_drift.phys_footprint_under_max = bal;
9400 }
9401 }
9402 }
9403 ledger_get_balance(pmap->ledger,
9404 task_ledgers.internal,
9405 &bal);
9406 if (bal != 0) {
9407 do_panic = TRUE;
9408 printf("LEDGER BALANCE proc %d (%s) "
9409 "\"internal\" = %lld\n",
9410 pid, procname, bal);
9411 if (bal > 0) {
9412 pmap_ledgers_drift.internal_over++;
9413 pmap_ledgers_drift.internal_over_total += bal;
9414 if (bal > pmap_ledgers_drift.internal_over_max) {
9415 pmap_ledgers_drift.internal_over_max = bal;
9416 }
9417 } else {
9418 pmap_ledgers_drift.internal_under++;
9419 pmap_ledgers_drift.internal_under_total += bal;
9420 if (bal < pmap_ledgers_drift.internal_under_max) {
9421 pmap_ledgers_drift.internal_under_max = bal;
9422 }
9423 }
9424 }
9425 ledger_get_balance(pmap->ledger,
9426 task_ledgers.internal_compressed,
9427 &bal);
9428 if (bal != 0) {
9429 do_panic = TRUE;
9430 printf("LEDGER BALANCE proc %d (%s) "
9431 "\"internal_compressed\" = %lld\n",
9432 pid, procname, bal);
9433 if (bal > 0) {
9434 pmap_ledgers_drift.internal_compressed_over++;
9435 pmap_ledgers_drift.internal_compressed_over_total += bal;
9436 if (bal > pmap_ledgers_drift.internal_compressed_over_max) {
9437 pmap_ledgers_drift.internal_compressed_over_max = bal;
9438 }
9439 } else {
9440 pmap_ledgers_drift.internal_compressed_under++;
9441 pmap_ledgers_drift.internal_compressed_under_total += bal;
9442 if (bal < pmap_ledgers_drift.internal_compressed_under_max) {
9443 pmap_ledgers_drift.internal_compressed_under_max = bal;
9444 }
9445 }
9446 }
9447 ledger_get_balance(pmap->ledger,
9448 task_ledgers.iokit_mapped,
9449 &bal);
9450 if (bal != 0) {
9451 do_panic = TRUE;
9452 printf("LEDGER BALANCE proc %d (%s) "
9453 "\"iokit_mapped\" = %lld\n",
9454 pid, procname, bal);
9455 if (bal > 0) {
9456 pmap_ledgers_drift.iokit_mapped_over++;
9457 pmap_ledgers_drift.iokit_mapped_over_total += bal;
9458 if (bal > pmap_ledgers_drift.iokit_mapped_over_max) {
9459 pmap_ledgers_drift.iokit_mapped_over_max = bal;
9460 }
9461 } else {
9462 pmap_ledgers_drift.iokit_mapped_under++;
9463 pmap_ledgers_drift.iokit_mapped_under_total += bal;
9464 if (bal < pmap_ledgers_drift.iokit_mapped_under_max) {
9465 pmap_ledgers_drift.iokit_mapped_under_max = bal;
9466 }
9467 }
9468 }
9469 ledger_get_balance(pmap->ledger,
9470 task_ledgers.alternate_accounting,
9471 &bal);
9472 if (bal != 0) {
9473 do_panic = TRUE;
9474 printf("LEDGER BALANCE proc %d (%s) "
9475 "\"alternate_accounting\" = %lld\n",
9476 pid, procname, bal);
9477 if (bal > 0) {
9478 pmap_ledgers_drift.alternate_accounting_over++;
9479 pmap_ledgers_drift.alternate_accounting_over_total += bal;
9480 if (bal > pmap_ledgers_drift.alternate_accounting_over_max) {
9481 pmap_ledgers_drift.alternate_accounting_over_max = bal;
9482 }
9483 } else {
9484 pmap_ledgers_drift.alternate_accounting_under++;
9485 pmap_ledgers_drift.alternate_accounting_under_total += bal;
9486 if (bal < pmap_ledgers_drift.alternate_accounting_under_max) {
9487 pmap_ledgers_drift.alternate_accounting_under_max = bal;
9488 }
9489 }
9490 }
9491 ledger_get_balance(pmap->ledger,
9492 task_ledgers.alternate_accounting_compressed,
9493 &bal);
9494 if (bal != 0) {
9495 do_panic = TRUE;
9496 printf("LEDGER BALANCE proc %d (%s) "
9497 "\"alternate_accounting_compressed\" = %lld\n",
9498 pid, procname, bal);
9499 if (bal > 0) {
9500 pmap_ledgers_drift.alternate_accounting_compressed_over++;
9501 pmap_ledgers_drift.alternate_accounting_compressed_over_total += bal;
9502 if (bal > pmap_ledgers_drift.alternate_accounting_compressed_over_max) {
9503 pmap_ledgers_drift.alternate_accounting_compressed_over_max = bal;
9504 }
9505 } else {
9506 pmap_ledgers_drift.alternate_accounting_compressed_under++;
9507 pmap_ledgers_drift.alternate_accounting_compressed_under_total += bal;
9508 if (bal < pmap_ledgers_drift.alternate_accounting_compressed_under_max) {
9509 pmap_ledgers_drift.alternate_accounting_compressed_under_max = bal;
9510 }
9511 }
9512 }
9513 ledger_get_balance(pmap->ledger,
9514 task_ledgers.page_table,
9515 &bal);
9516 if (bal != 0) {
9517 do_panic = TRUE;
9518 printf("LEDGER BALANCE proc %d (%s) "
9519 "\"page_table\" = %lld\n",
9520 pid, procname, bal);
9521 if (bal > 0) {
9522 pmap_ledgers_drift.page_table_over++;
9523 pmap_ledgers_drift.page_table_over_total += bal;
9524 if (bal > pmap_ledgers_drift.page_table_over_max) {
9525 pmap_ledgers_drift.page_table_over_max = bal;
9526 }
9527 } else {
9528 pmap_ledgers_drift.page_table_under++;
9529 pmap_ledgers_drift.page_table_under_total += bal;
9530 if (bal < pmap_ledgers_drift.page_table_under_max) {
9531 pmap_ledgers_drift.page_table_under_max = bal;
9532 }
9533 }
9534 }
9535 ledger_get_balance(pmap->ledger,
9536 task_ledgers.purgeable_volatile,
9537 &bal);
9538 if (bal != 0) {
9539 do_panic = TRUE;
9540 printf("LEDGER BALANCE proc %d (%s) "
9541 "\"purgeable_volatile\" = %lld\n",
9542 pid, procname, bal);
9543 if (bal > 0) {
9544 pmap_ledgers_drift.purgeable_volatile_over++;
9545 pmap_ledgers_drift.purgeable_volatile_over_total += bal;
9546 if (bal > pmap_ledgers_drift.purgeable_volatile_over_max) {
9547 pmap_ledgers_drift.purgeable_volatile_over_max = bal;
9548 }
9549 } else {
9550 pmap_ledgers_drift.purgeable_volatile_under++;
9551 pmap_ledgers_drift.purgeable_volatile_under_total += bal;
9552 if (bal < pmap_ledgers_drift.purgeable_volatile_under_max) {
9553 pmap_ledgers_drift.purgeable_volatile_under_max = bal;
9554 }
9555 }
9556 }
9557 ledger_get_balance(pmap->ledger,
9558 task_ledgers.purgeable_nonvolatile,
9559 &bal);
9560 if (bal != 0) {
9561 do_panic = TRUE;
9562 printf("LEDGER BALANCE proc %d (%s) "
9563 "\"purgeable_nonvolatile\" = %lld\n",
9564 pid, procname, bal);
9565 if (bal > 0) {
9566 pmap_ledgers_drift.purgeable_nonvolatile_over++;
9567 pmap_ledgers_drift.purgeable_nonvolatile_over_total += bal;
9568 if (bal > pmap_ledgers_drift.purgeable_nonvolatile_over_max) {
9569 pmap_ledgers_drift.purgeable_nonvolatile_over_max = bal;
9570 }
9571 } else {
9572 pmap_ledgers_drift.purgeable_nonvolatile_under++;
9573 pmap_ledgers_drift.purgeable_nonvolatile_under_total += bal;
9574 if (bal < pmap_ledgers_drift.purgeable_nonvolatile_under_max) {
9575 pmap_ledgers_drift.purgeable_nonvolatile_under_max = bal;
9576 }
9577 }
9578 }
9579 ledger_get_balance(pmap->ledger,
9580 task_ledgers.purgeable_volatile_compressed,
9581 &bal);
9582 if (bal != 0) {
9583 do_panic = TRUE;
9584 printf("LEDGER BALANCE proc %d (%s) "
9585 "\"purgeable_volatile_compressed\" = %lld\n",
9586 pid, procname, bal);
9587 if (bal > 0) {
9588 pmap_ledgers_drift.purgeable_volatile_compressed_over++;
9589 pmap_ledgers_drift.purgeable_volatile_compressed_over_total += bal;
9590 if (bal > pmap_ledgers_drift.purgeable_volatile_compressed_over_max) {
9591 pmap_ledgers_drift.purgeable_volatile_compressed_over_max = bal;
9592 }
9593 } else {
9594 pmap_ledgers_drift.purgeable_volatile_compressed_under++;
9595 pmap_ledgers_drift.purgeable_volatile_compressed_under_total += bal;
9596 if (bal < pmap_ledgers_drift.purgeable_volatile_compressed_under_max) {
9597 pmap_ledgers_drift.purgeable_volatile_compressed_under_max = bal;
9598 }
9599 }
9600 }
9601 ledger_get_balance(pmap->ledger,
9602 task_ledgers.purgeable_nonvolatile_compressed,
9603 &bal);
9604 if (bal != 0) {
9605 do_panic = TRUE;
9606 printf("LEDGER BALANCE proc %d (%s) "
9607 "\"purgeable_nonvolatile_compressed\" = %lld\n",
9608 pid, procname, bal);
9609 if (bal > 0) {
9610 pmap_ledgers_drift.purgeable_nonvolatile_compressed_over++;
9611 pmap_ledgers_drift.purgeable_nonvolatile_compressed_over_total += bal;
9612 if (bal > pmap_ledgers_drift.purgeable_nonvolatile_compressed_over_max) {
9613 pmap_ledgers_drift.purgeable_nonvolatile_compressed_over_max = bal;
9614 }
9615 } else {
9616 pmap_ledgers_drift.purgeable_nonvolatile_compressed_under++;
9617 pmap_ledgers_drift.purgeable_nonvolatile_compressed_under_total += bal;
9618 if (bal < pmap_ledgers_drift.purgeable_nonvolatile_compressed_under_max) {
9619 pmap_ledgers_drift.purgeable_nonvolatile_compressed_under_max = bal;
9620 }
9621 }
9622 }
9623
9624 if (do_panic) {
9625 if (pmap_ledgers_panic &&
9626 pmap->pmap_stats_assert) {
9627 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
9628 pmap, pid, procname);
9629 } else {
9630 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
9631 pmap, pid, procname);
9632 }
9633 }
9634
9635 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
9636 #if 00
9637 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
9638 #endif
9639 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
9640 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
9641 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
9642 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
9643 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
9644 }
9645 #endif /* MACH_ASSERT */
9646
9647 void pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a) {
9648 }
9649
9650
9651 #if CONFIG_PGTRACE
9652 #define PROF_START uint64_t t, nanot;\
9653 t = mach_absolute_time();
9654
9655 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
9656 kprintf("%s: took %llu ns\n", __func__, nanot);
9657
9658 #define PMAP_PGTRACE_LOCK(p) \
9659 do { \
9660 *(p) = ml_set_interrupts_enabled(false); \
9661 if (simple_lock_try(&(pmap_pgtrace.lock))) break; \
9662 ml_set_interrupts_enabled(*(p)); \
9663 } while (true)
9664
9665 #define PMAP_PGTRACE_UNLOCK(p) \
9666 do { \
9667 simple_unlock(&(pmap_pgtrace.lock)); \
9668 ml_set_interrupts_enabled(*(p)); \
9669 } while (0)
9670
9671 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
9672 do { \
9673 *(pte_p) = (pte_entry); \
9674 FLUSH_PTE(pte_p); \
9675 } while (0)
9676
9677 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
9678
9679 typedef enum {
9680 UNDEFINED,
9681 PA_UNDEFINED,
9682 VA_UNDEFINED,
9683 DEFINED
9684 } pmap_pgtrace_page_state_t;
9685
9686 typedef struct {
9687 queue_chain_t chain;
9688
9689 /*
9690 pa - pa
9691 maps - list of va maps to upper pa
9692 map_pool - map pool
9693 map_waste - waste can
9694 state - state
9695 */
9696 pmap_paddr_t pa;
9697 queue_head_t maps;
9698 queue_head_t map_pool;
9699 queue_head_t map_waste;
9700 pmap_pgtrace_page_state_t state;
9701 } pmap_pgtrace_page_t;
9702
9703 static struct {
9704 /*
9705 pages - list of tracing page info
9706 */
9707 queue_head_t pages;
9708 decl_simple_lock_data(, lock);
9709 } pmap_pgtrace = {};
9710
9711 static void pmap_pgtrace_init(void)
9712 {
9713 queue_init(&(pmap_pgtrace.pages));
9714 simple_lock_init(&(pmap_pgtrace.lock), 0);
9715
9716 boolean_t enabled;
9717
9718 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
9719 pgtrace_enabled = enabled;
9720 }
9721 }
9722
9723 // find a page with given pa - pmap_pgtrace should be locked
9724 inline static pmap_pgtrace_page_t *pmap_pgtrace_find_page(pmap_paddr_t pa)
9725 {
9726 queue_head_t *q = &(pmap_pgtrace.pages);
9727 pmap_pgtrace_page_t *p;
9728
9729 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
9730 if (p->state == UNDEFINED) {
9731 continue;
9732 }
9733 if (p->state == PA_UNDEFINED) {
9734 continue;
9735 }
9736 if (p->pa == pa) {
9737 return p;
9738 }
9739 }
9740
9741 return NULL;
9742 }
9743
9744 // enter clone of given pmap, va page and range - pmap should be locked
9745 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
9746 {
9747 bool ints;
9748 queue_head_t *q = &(pmap_pgtrace.pages);
9749 pmap_paddr_t pa_page;
9750 pt_entry_t *ptep, *cptep;
9751 pmap_pgtrace_page_t *p;
9752 bool found = false;
9753
9754 PMAP_ASSERT_LOCKED(pmap);
9755 assert(va_page == arm_trunc_page(va_page));
9756
9757 PMAP_PGTRACE_LOCK(&ints);
9758
9759 ptep = pmap_pte(pmap, va_page);
9760
9761 // target pte should exist
9762 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
9763 PMAP_PGTRACE_UNLOCK(&ints);
9764 return false;
9765 }
9766
9767 queue_head_t *mapq;
9768 queue_head_t *mappool;
9769 pmap_pgtrace_map_t *map = NULL;
9770
9771 pa_page = pte_to_pa(*ptep);
9772
9773 // find if we have a page info defined for this
9774 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
9775 mapq = &(p->maps);
9776 mappool = &(p->map_pool);
9777
9778 switch (p->state) {
9779 case PA_UNDEFINED:
9780 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
9781 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
9782 p->pa = pa_page;
9783 map->range.start = start;
9784 map->range.end = end;
9785 found = true;
9786 break;
9787 }
9788 }
9789 break;
9790
9791 case VA_UNDEFINED:
9792 if (p->pa != pa_page) {
9793 break;
9794 }
9795 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
9796 if (map->cloned == false) {
9797 map->pmap = pmap;
9798 map->ova = va_page;
9799 map->range.start = start;
9800 map->range.end = end;
9801 found = true;
9802 break;
9803 }
9804 }
9805 break;
9806
9807 case DEFINED:
9808 if (p->pa != pa_page) {
9809 break;
9810 }
9811 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
9812 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
9813 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
9814 break;
9815 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
9816 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
9817 break;
9818 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
9819 // range should be already defined as well
9820 found = true;
9821 break;
9822 }
9823 }
9824 break;
9825
9826 default:
9827 panic("invalid state p->state=%x\n", p->state);
9828 }
9829
9830 if (found == true) {
9831 break;
9832 }
9833 }
9834
9835 // do not clone if no page info found
9836 if (found == false) {
9837 PMAP_PGTRACE_UNLOCK(&ints);
9838 return false;
9839 }
9840
9841 // copy pre, target and post ptes to clone ptes
9842 for (int i = 0; i < 3; i++) {
9843 ptep = pmap_pte(pmap, va_page + (i-1)*ARM_PGBYTES);
9844 cptep = pmap_pte(kernel_pmap, map->cva[i]);
9845 assert(cptep != NULL);
9846 if (ptep == NULL) {
9847 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
9848 } else {
9849 PGTRACE_WRITE_PTE(cptep, *ptep);
9850 }
9851 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i]+ARM_PGBYTES);
9852 }
9853
9854 // get ptes for original and clone
9855 ptep = pmap_pte(pmap, va_page);
9856 cptep = pmap_pte(kernel_pmap, map->cva[1]);
9857
9858 // invalidate original pte and mark it as a pgtrace page
9859 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
9860 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova+ARM_PGBYTES);
9861
9862 map->cloned = true;
9863 p->state = DEFINED;
9864
9865 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
9866
9867 PMAP_PGTRACE_UNLOCK(&ints);
9868
9869 return true;
9870 }
9871
9872 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
9873 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
9874 {
9875 bool ints, found = false;
9876 pmap_pgtrace_page_t *p;
9877 pt_entry_t *ptep;
9878
9879 PMAP_PGTRACE_LOCK(&ints);
9880
9881 // we must have this page info
9882 p = pmap_pgtrace_find_page(pa);
9883 if (p == NULL) {
9884 goto unlock_exit;
9885 }
9886
9887 // find matching map
9888 queue_head_t *mapq = &(p->maps);
9889 queue_head_t *mappool = &(p->map_pool);
9890 pmap_pgtrace_map_t *map;
9891
9892 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
9893 if (map->pmap == pmap && map->ova == va) {
9894 found = true;
9895 break;
9896 }
9897 }
9898
9899 if (!found) {
9900 goto unlock_exit;
9901 }
9902
9903 if (map->cloned == true) {
9904 // Restore back the pte to original state
9905 ptep = pmap_pte(pmap, map->ova);
9906 assert(ptep);
9907 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
9908 PMAP_UPDATE_TLBS(pmap, va, va+ARM_PGBYTES);
9909
9910 // revert clone pages
9911 for (int i = 0; i < 3; i++) {
9912 ptep = pmap_pte(kernel_pmap, map->cva[i]);
9913 assert(ptep != NULL);
9914 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
9915 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i]+ARM_PGBYTES);
9916 }
9917 }
9918
9919 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
9920 map->pmap = NULL;
9921 map->ova = (vm_map_offset_t)NULL;
9922 map->cloned = false;
9923 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
9924
9925 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
9926
9927 unlock_exit:
9928 PMAP_PGTRACE_UNLOCK(&ints);
9929 }
9930
9931 // remove all clones of given pa - pmap must be locked
9932 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
9933 {
9934 bool ints;
9935 pmap_pgtrace_page_t *p;
9936 pt_entry_t *ptep;
9937
9938 PMAP_PGTRACE_LOCK(&ints);
9939
9940 // we must have this page info
9941 p = pmap_pgtrace_find_page(pa);
9942 if (p == NULL) {
9943 PMAP_PGTRACE_UNLOCK(&ints);
9944 return;
9945 }
9946
9947 queue_head_t *mapq = &(p->maps);
9948 queue_head_t *mappool = &(p->map_pool);
9949 queue_head_t *mapwaste = &(p->map_waste);
9950 pmap_pgtrace_map_t *map;
9951
9952 // move maps to waste
9953 while (!queue_empty(mapq)) {
9954 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
9955 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
9956 }
9957
9958 PMAP_PGTRACE_UNLOCK(&ints);
9959
9960 // sanitize maps in waste
9961 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
9962 if (map->cloned == true) {
9963 PMAP_LOCK(map->pmap);
9964
9965 // restore back original pte
9966 ptep = pmap_pte(map->pmap, map->ova);
9967 assert(ptep);
9968 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
9969 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova+ARM_PGBYTES);
9970
9971 // revert clone ptes
9972 for (int i = 0; i < 3; i++) {
9973 ptep = pmap_pte(kernel_pmap, map->cva[i]);
9974 assert(ptep != NULL);
9975 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
9976 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i]+ARM_PGBYTES);
9977 }
9978
9979 PMAP_UNLOCK(map->pmap);
9980 }
9981
9982 map->pmap = NULL;
9983 map->ova = (vm_map_offset_t)NULL;
9984 map->cloned = false;
9985 }
9986
9987 PMAP_PGTRACE_LOCK(&ints);
9988
9989 // recycle maps back to map_pool
9990 while (!queue_empty(mapwaste)) {
9991 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
9992 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
9993 }
9994
9995 PMAP_PGTRACE_UNLOCK(&ints);
9996 }
9997
9998 inline static void pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
9999 {
10000 uint64_t tsz;
10001 vm_map_offset_t end;
10002
10003 if (pmap == kernel_pmap) {
10004 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
10005 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64-tsz)) << (64-tsz));
10006 *endp = VM_MAX_KERNEL_ADDRESS;
10007 } else {
10008 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
10009 if (tsz == 64) {
10010 end = 0;
10011 } else {
10012 end = ((uint64_t)1 << (64-tsz)) - 1;
10013 }
10014
10015 *startp = 0;
10016 *endp = end;
10017 }
10018
10019 assert(*endp > *startp);
10020
10021 return;
10022 }
10023
10024 // has pa mapped in given pmap? then clone it
10025 static uint64_t pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset) {
10026 uint64_t ret = 0;
10027 vm_map_offset_t min, max;
10028 vm_map_offset_t cur_page, end_page;
10029 pt_entry_t *ptep;
10030 tt_entry_t *ttep;
10031 tt_entry_t tte;
10032
10033 pmap_pgtrace_get_search_space(pmap, &min, &max);
10034
10035 cur_page = arm_trunc_page(min);
10036 end_page = arm_trunc_page(max);
10037 while (cur_page <= end_page) {
10038 vm_map_offset_t add = 0;
10039
10040 PMAP_LOCK(pmap);
10041
10042 // skip uninterested space
10043 if (pmap == kernel_pmap &&
10044 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
10045 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
10046 add = ARM_PGBYTES;
10047 goto unlock_continue;
10048 }
10049
10050 #if __ARM64_TWO_LEVEL_PMAP__
10051 // check whether we can skip l2
10052 ttep = pmap_tt2e(pmap, cur_page);
10053 assert(ttep);
10054 tte = *ttep;
10055 #else
10056 // check whether we can skip l1
10057 ttep = pmap_tt1e(pmap, cur_page);
10058 assert(ttep);
10059 tte = *ttep;
10060 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10061 add = ARM_TT_L1_SIZE;
10062 goto unlock_continue;
10063 }
10064
10065 // how about l2
10066 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, cur_page)];
10067 #endif
10068 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10069 add = ARM_TT_L2_SIZE;
10070 goto unlock_continue;
10071 }
10072
10073 // ptep finally
10074 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, cur_page)]);
10075 if (ptep == PT_ENTRY_NULL) {
10076 add = ARM_TT_L3_SIZE;
10077 goto unlock_continue;
10078 }
10079
10080 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
10081 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
10082 ret++;
10083 }
10084 }
10085
10086 add = ARM_PGBYTES;
10087
10088 unlock_continue:
10089 PMAP_UNLOCK(pmap);
10090
10091 //overflow
10092 if (cur_page + add < cur_page) {
10093 break;
10094 }
10095
10096 cur_page += add;
10097 }
10098
10099
10100 return ret;
10101 }
10102
10103 // search pv table and clone vas of given pa
10104 static uint64_t pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
10105 {
10106 uint64_t ret = 0;
10107 unsigned long pai;
10108 pv_entry_t **pvh;
10109 pt_entry_t *ptep;
10110 pmap_t pmap;
10111
10112 typedef struct {
10113 queue_chain_t chain;
10114 pmap_t pmap;
10115 vm_map_offset_t va;
10116 } pmap_va_t;
10117
10118 queue_head_t pmapvaq;
10119 pmap_va_t *pmapva;
10120
10121 queue_init(&pmapvaq);
10122
10123 pai = pa_index(pa);
10124 LOCK_PVH(pai);
10125 pvh = pai_to_pvh(pai);
10126
10127 // collect pmap/va pair from pvh
10128 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
10129 ptep = pvh_ptep(pvh);
10130 pmap = ptep_get_pmap(ptep);
10131
10132 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
10133 pmapva->pmap = pmap;
10134 pmapva->va = ptep_get_va(ptep);
10135
10136 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
10137
10138 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
10139 pv_entry_t *pvep;
10140
10141 pvep = pvh_list(pvh);
10142 while (pvep) {
10143 ptep = pve_get_ptep(pvep);
10144 pmap = ptep_get_pmap(ptep);
10145
10146 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
10147 pmapva->pmap = pmap;
10148 pmapva->va = ptep_get_va(ptep);
10149
10150 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
10151
10152 pvep = PVE_NEXT_PTR(pve_next(pvep));
10153 }
10154 }
10155
10156 UNLOCK_PVH(pai);
10157
10158 // clone them while making sure mapping still exists
10159 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
10160 PMAP_LOCK(pmapva->pmap);
10161 ptep = pmap_pte(pmapva->pmap, pmapva->va);
10162 if (pte_to_pa(*ptep) == pa) {
10163 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
10164 ret++;
10165 }
10166 }
10167 PMAP_UNLOCK(pmapva->pmap);
10168
10169 kfree(pmapva, sizeof(pmap_va_t));
10170 }
10171
10172 return ret;
10173 }
10174
10175 // allocate a page info
10176 static pmap_pgtrace_page_t *pmap_pgtrace_alloc_page(void)
10177 {
10178 pmap_pgtrace_page_t *p;
10179 queue_head_t *mapq;
10180 queue_head_t *mappool;
10181 queue_head_t *mapwaste;
10182 pmap_pgtrace_map_t *map;
10183
10184 p = kalloc(sizeof(pmap_pgtrace_page_t));
10185 assert(p);
10186
10187 p->state = UNDEFINED;
10188
10189 mapq = &(p->maps);
10190 mappool = &(p->map_pool);
10191 mapwaste = &(p->map_waste);
10192 queue_init(mapq);
10193 queue_init(mappool);
10194 queue_init(mapwaste);
10195
10196 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
10197 vm_map_offset_t newcva;
10198 pt_entry_t *cptep;
10199 kern_return_t kr;
10200 vm_map_entry_t entry;
10201
10202 // get a clone va
10203 vm_object_reference(kernel_object);
10204 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3*ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
10205 if (kr != KERN_SUCCESS) {
10206 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
10207 }
10208 VME_OBJECT_SET(entry, kernel_object);
10209 VME_OFFSET_SET(entry, newcva);
10210 vm_map_unlock(kernel_map);
10211
10212 // fill default clone page info and add to pool
10213 map = kalloc(sizeof(pmap_pgtrace_map_t));
10214 for (int j = 0; j < 3; j ++) {
10215 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
10216
10217 // pre-expand pmap while preemption enabled
10218 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
10219 if (kr != KERN_SUCCESS) {
10220 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
10221 }
10222
10223 cptep = pmap_pte(kernel_pmap, addr);
10224 assert(cptep != NULL);
10225
10226 map->cva[j] = addr;
10227 map->cva_spte[j] = *cptep;
10228 }
10229 map->range.start = map->range.end = 0;
10230 map->cloned = false;
10231 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
10232 }
10233
10234 return p;
10235 }
10236
10237 // free a page info
10238 static void pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
10239 {
10240 queue_head_t *mapq;
10241 queue_head_t *mappool;
10242 queue_head_t *mapwaste;
10243 pmap_pgtrace_map_t *map;
10244
10245 assert(p);
10246
10247 mapq = &(p->maps);
10248 mappool = &(p->map_pool);
10249 mapwaste = &(p->map_waste);
10250
10251 while (!queue_empty(mapq)) {
10252 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
10253 kfree(map, sizeof(pmap_pgtrace_map_t));
10254 }
10255
10256 while (!queue_empty(mappool)) {
10257 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
10258 kfree(map, sizeof(pmap_pgtrace_map_t));
10259 }
10260
10261 while (!queue_empty(mapwaste)) {
10262 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
10263 kfree(map, sizeof(pmap_pgtrace_map_t));
10264 }
10265
10266 kfree(p, sizeof(pmap_pgtrace_page_t));
10267 }
10268
10269 // construct page infos with the given address range
10270 int pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
10271 {
10272 int ret = 0;
10273 pt_entry_t *ptep;
10274 queue_head_t *q = &(pmap_pgtrace.pages);
10275 bool ints;
10276 vm_map_offset_t cur_page, end_page;
10277
10278 if (start > end) {
10279 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
10280 return -1;
10281 }
10282
10283 PROF_START
10284
10285 // add each page in given range
10286 cur_page = arm_trunc_page(start);
10287 end_page = arm_trunc_page(end);
10288 while (cur_page <= end_page) {
10289 pmap_paddr_t pa_page = 0;
10290 uint64_t num_cloned = 0;
10291 pmap_pgtrace_page_t *p = NULL, *newp;
10292 bool free_newp = true;
10293 pmap_pgtrace_page_state_t state;
10294
10295 // do all allocations outside of spinlocks
10296 newp = pmap_pgtrace_alloc_page();
10297
10298 // keep lock orders in pmap, kernel_pmap and pgtrace lock
10299 if (pmap != NULL) {
10300 PMAP_LOCK(pmap);
10301 }
10302 if (pmap != kernel_pmap) {
10303 PMAP_LOCK(kernel_pmap);
10304 }
10305
10306 // addresses are physical if pmap is null
10307 if (pmap == NULL) {
10308 ptep = NULL;
10309 pa_page = cur_page;
10310 state = VA_UNDEFINED;
10311 } else {
10312 ptep = pmap_pte(pmap, cur_page);
10313 if (ptep != NULL) {
10314 pa_page = pte_to_pa(*ptep);
10315 state = DEFINED;
10316 } else {
10317 state = PA_UNDEFINED;
10318 }
10319 }
10320
10321 // search if we have a page info already
10322 PMAP_PGTRACE_LOCK(&ints);
10323 if (state != PA_UNDEFINED) {
10324 p = pmap_pgtrace_find_page(pa_page);
10325 }
10326
10327 // add pre-allocated page info if nothing found
10328 if (p == NULL) {
10329 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
10330 p = newp;
10331 free_newp = false;
10332 }
10333
10334 // now p points what we want
10335 p->state = state;
10336
10337 queue_head_t *mapq = &(p->maps);
10338 queue_head_t *mappool = &(p->map_pool);
10339 pmap_pgtrace_map_t *map;
10340 vm_map_offset_t start_offset, end_offset;
10341
10342 // calculate trace offsets in the page
10343 if (cur_page > start) {
10344 start_offset = 0;
10345 } else {
10346 start_offset = start-cur_page;
10347 }
10348 if (cur_page == end_page) {
10349 end_offset = end-end_page;
10350 } else {
10351 end_offset = ARM_PGBYTES-1;
10352 }
10353
10354 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
10355
10356 // fill map info
10357 assert(!queue_empty(mappool));
10358 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
10359 if (p->state == PA_UNDEFINED) {
10360 map->pmap = pmap;
10361 map->ova = cur_page;
10362 map->range.start = start_offset;
10363 map->range.end = end_offset;
10364 } else if (p->state == VA_UNDEFINED) {
10365 p->pa = pa_page;
10366 map->range.start = start_offset;
10367 map->range.end = end_offset;
10368 } else if (p->state == DEFINED) {
10369 p->pa = pa_page;
10370 map->pmap = pmap;
10371 map->ova = cur_page;
10372 map->range.start = start_offset;
10373 map->range.end = end_offset;
10374 } else {
10375 panic("invalid p->state=%d\n", p->state);
10376 }
10377
10378 // not cloned yet
10379 map->cloned = false;
10380 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
10381
10382 // unlock locks
10383 PMAP_PGTRACE_UNLOCK(&ints);
10384 if (pmap != kernel_pmap) {
10385 PMAP_UNLOCK(kernel_pmap);
10386 }
10387 if (pmap != NULL) {
10388 PMAP_UNLOCK(pmap);
10389 }
10390
10391 // now clone it
10392 if (pa_valid(pa_page)) {
10393 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
10394 }
10395 if (pmap == NULL) {
10396 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
10397 } else {
10398 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
10399 }
10400
10401 // free pre-allocations if we didn't add it to the q
10402 if (free_newp) {
10403 pmap_pgtrace_free_page(newp);
10404 }
10405
10406 if (num_cloned == 0) {
10407 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
10408 }
10409
10410 ret += num_cloned;
10411
10412 // overflow
10413 if (cur_page + ARM_PGBYTES < cur_page) {
10414 break;
10415 } else {
10416 cur_page += ARM_PGBYTES;
10417 }
10418 }
10419
10420 PROF_END
10421
10422 return ret;
10423 }
10424
10425 // delete page infos for given address range
10426 int pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
10427 {
10428 int ret = 0;
10429 bool ints;
10430 queue_head_t *q = &(pmap_pgtrace.pages);
10431 pmap_pgtrace_page_t *p;
10432 vm_map_offset_t cur_page, end_page;
10433
10434 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
10435
10436 PROF_START
10437
10438 pt_entry_t *ptep;
10439 pmap_paddr_t pa_page;
10440
10441 // remove page info from start to end
10442 cur_page = arm_trunc_page(start);
10443 end_page = arm_trunc_page(end);
10444 while (cur_page <= end_page) {
10445 p = NULL;
10446
10447 if (pmap == NULL) {
10448 pa_page = cur_page;
10449 } else {
10450 PMAP_LOCK(pmap);
10451 ptep = pmap_pte(pmap, cur_page);
10452 if (ptep == NULL) {
10453 PMAP_UNLOCK(pmap);
10454 goto cont;
10455 }
10456 pa_page = pte_to_pa(*ptep);
10457 PMAP_UNLOCK(pmap);
10458 }
10459
10460 // remove all clones and validate
10461 pmap_pgtrace_remove_all_clone(pa_page);
10462
10463 // find page info and delete
10464 PMAP_PGTRACE_LOCK(&ints);
10465 p = pmap_pgtrace_find_page(pa_page);
10466 if (p != NULL) {
10467 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
10468 ret++;
10469 }
10470 PMAP_PGTRACE_UNLOCK(&ints);
10471
10472 // free outside of locks
10473 if (p != NULL) {
10474 pmap_pgtrace_free_page(p);
10475 }
10476
10477 cont:
10478 // overflow
10479 if (cur_page + ARM_PGBYTES < cur_page) {
10480 break;
10481 } else {
10482 cur_page += ARM_PGBYTES;
10483 }
10484 }
10485
10486 PROF_END
10487
10488 return ret;
10489 }
10490
10491 kern_return_t pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
10492 {
10493 pt_entry_t *ptep;
10494 pgtrace_run_result_t res;
10495 pmap_pgtrace_page_t *p;
10496 bool ints, found = false;
10497 pmap_paddr_t pa;
10498
10499 // Quick check if we are interested
10500 ptep = pmap_pte(pmap, va);
10501 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
10502 return KERN_FAILURE;
10503 }
10504
10505 PMAP_PGTRACE_LOCK(&ints);
10506
10507 // Check again since access is serialized
10508 ptep = pmap_pte(pmap, va);
10509 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
10510 PMAP_PGTRACE_UNLOCK(&ints);
10511 return KERN_FAILURE;
10512
10513 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
10514 // Somehow this cpu's tlb has not updated
10515 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
10516 PMAP_UPDATE_TLBS(pmap, va, va+ARM_PGBYTES);
10517
10518 PMAP_PGTRACE_UNLOCK(&ints);
10519 return KERN_SUCCESS;
10520 }
10521
10522 // Find if this pa is what we are tracing
10523 pa = pte_to_pa(*ptep);
10524
10525 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
10526 if (p == NULL) {
10527 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
10528 }
10529
10530 // find if pmap and va are also matching
10531 queue_head_t *mapq = &(p->maps);
10532 queue_head_t *mapwaste = &(p->map_waste);
10533 pmap_pgtrace_map_t *map;
10534
10535 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10536 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
10537 found = true;
10538 break;
10539 }
10540 }
10541
10542 // if not found, search map waste as they are still valid
10543 if (!found) {
10544 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
10545 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
10546 found = true;
10547 break;
10548 }
10549 }
10550 }
10551
10552 if (!found) {
10553 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
10554 }
10555
10556 // Decode and run it on the clone map
10557 bzero(&res, sizeof(res));
10558 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
10559 va, map->cva, // fault va and clone page vas
10560 ss, &res);
10561
10562 // write a log if in range
10563 vm_map_offset_t offset = va - map->ova;
10564 if (map->range.start <= offset && offset <= map->range.end) {
10565 pgtrace_write_log(res);
10566 }
10567
10568 PMAP_PGTRACE_UNLOCK(&ints);
10569
10570 // Return to next instruction
10571 set_saved_state_pc(ss, get_saved_state_pc(ss) + sizeof(uint32_t));
10572
10573 return KERN_SUCCESS;
10574 }
10575 #endif
10576
10577 boolean_t
10578 pmap_enforces_execute_only(
10579 #if (__ARM_VMSA__ == 7)
10580 __unused
10581 #endif
10582 pmap_t pmap)
10583 {
10584 #if (__ARM_VMSA__ > 7)
10585 return (pmap != kernel_pmap);
10586 #else
10587 return FALSE;
10588 #endif
10589 }
10590
10591 void
10592 pmap_set_jit_entitled(
10593 __unused pmap_t pmap)
10594 {
10595 return;
10596 }
10597
10598 static kern_return_t
10599 pmap_query_page_info_internal(
10600 pmap_t pmap,
10601 vm_map_offset_t va,
10602 int *disp_p)
10603 {
10604 int disp;
10605 pmap_paddr_t pa;
10606 int pai;
10607 pt_entry_t *pte;
10608 pv_entry_t **pv_h, *pve_p;
10609
10610 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
10611 *disp_p = 0;
10612 return KERN_INVALID_ARGUMENT;
10613 }
10614
10615 disp = 0;
10616
10617 PMAP_LOCK(pmap);
10618
10619 pte = pmap_pte(pmap, va);
10620 if (pte == PT_ENTRY_NULL) {
10621 goto done;
10622 }
10623
10624 pa = pte_to_pa(*pte);
10625 if (pa == 0) {
10626 if (ARM_PTE_IS_COMPRESSED(*pte)) {
10627 disp |= PMAP_QUERY_PAGE_COMPRESSED;
10628 if (*pte & ARM_PTE_COMPRESSED_ALT) {
10629 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
10630 }
10631 }
10632 } else {
10633 disp |= PMAP_QUERY_PAGE_PRESENT;
10634 pai = (int) pa_index(pa);
10635 if (!pa_valid(pa)) {
10636 goto done;
10637 }
10638 LOCK_PVH(pai);
10639 pv_h = pai_to_pvh(pai);
10640 pve_p = PV_ENTRY_NULL;
10641 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10642 pve_p = pvh_list(pv_h);
10643 while (pve_p != PV_ENTRY_NULL &&
10644 pve_get_ptep(pve_p) != pte) {
10645 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
10646 }
10647 }
10648 if (IS_ALTACCT_PAGE(pai, pve_p)) {
10649 disp |= PMAP_QUERY_PAGE_ALTACCT;
10650 } else if (IS_REUSABLE_PAGE(pai)) {
10651 disp |= PMAP_QUERY_PAGE_REUSABLE;
10652 } else if (IS_INTERNAL_PAGE(pai)) {
10653 disp |= PMAP_QUERY_PAGE_INTERNAL;
10654 }
10655 UNLOCK_PVH(pai);
10656 }
10657
10658 done:
10659 PMAP_UNLOCK(pmap);
10660 *disp_p = disp;
10661 return KERN_SUCCESS;
10662 }
10663
10664 kern_return_t
10665 pmap_query_page_info(
10666 pmap_t pmap,
10667 vm_map_offset_t va,
10668 int *disp_p)
10669 {
10670 return pmap_query_page_info_internal(pmap, va, disp_p);
10671 }
10672
10673 kern_return_t
10674 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
10675 {
10676
10677 return KERN_SUCCESS;
10678 }
10679
10680 kern_return_t
10681 pmap_return(boolean_t do_panic, boolean_t do_recurse)
10682 {
10683 return pmap_return_internal(do_panic, do_recurse);
10684 }
10685
10686 static void
10687 pmap_footprint_suspend_internal(
10688 vm_map_t map,
10689 boolean_t suspend)
10690 {
10691 #if DEVELOPMENT || DEBUG
10692 if (suspend) {
10693 map->pmap->footprint_suspended = TRUE;
10694 map->pmap->footprint_was_suspended = TRUE;
10695 } else {
10696 map->pmap->footprint_suspended = FALSE;
10697 }
10698 #else /* DEVELOPMENT || DEBUG */
10699 (void) map;
10700 (void) suspend;
10701 #endif /* DEVELOPMENT || DEBUG */
10702 }
10703 void
10704 pmap_footprint_suspend(
10705 vm_map_t map,
10706 boolean_t suspend)
10707 {
10708 pmap_footprint_suspend_internal(map, suspend);
10709 }