]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm64/arm_vm_init.c
54a45f20290e381a999214d791fdeef53d21e4e0
[apple/xnu.git] / osfmk / arm64 / arm_vm_init.c
1 /*
2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_debug.h>
30 #include <mach_kdp.h>
31 #include <debug.h>
32
33 #include <kern/assert.h>
34 #include <kern/misc_protos.h>
35 #include <kern/monotonic.h>
36 #include <mach/vm_types.h>
37 #include <mach/vm_param.h>
38 #include <vm/vm_kern.h>
39 #include <vm/vm_page.h>
40 #include <vm/pmap.h>
41
42 #include <machine/atomic.h>
43 #include <arm64/proc_reg.h>
44 #include <arm64/lowglobals.h>
45 #include <arm/cpu_data_internal.h>
46 #include <arm/misc_protos.h>
47 #include <pexpert/arm64/boot.h>
48 #include <pexpert/device_tree.h>
49
50 #include <libkern/kernel_mach_header.h>
51 #include <libkern/section_keywords.h>
52
53 #include <san/kasan.h>
54
55 #if __ARM_KERNEL_PROTECT__
56 /*
57 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
58 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
59 * range covered by TTBR1 in half). This must also cover the exception vectors.
60 */
61 static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
62
63 /* The exception vectors and the kernel cannot share root TTEs. */
64 static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
65
66 /*
67 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
68 * the exception vectors.
69 */
70 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
71 #endif /* __ARM_KERNEL_PROTECT__ */
72
73 #if __APRR_SUPPORTED__ && XNU_MONITOR
74 /*
75 * If APRR is supported, setting XN on L1/L2 table entries will shift the effective
76 * APRR index of L3 PTEs covering PPL-protected pages in the kernel dynamic region
77 * from PPL R/W to kernel R/W. That will effectively remove PPL write protection
78 * from those pages. Avoid setting XN at the table level for MONITOR-enabled builds
79 * that are backed by APRR.
80 */
81 #define ARM_DYNAMIC_TABLE_XN ARM_TTE_TABLE_PXN
82 #else
83 #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
84 #endif
85
86 #if KASAN
87 extern vm_offset_t shadow_pbase;
88 extern vm_offset_t shadow_ptop;
89 extern vm_offset_t physmap_vbase;
90 extern vm_offset_t physmap_vtop;
91 #endif
92
93 /*
94 * We explicitly place this in const, as it is not const from a language
95 * perspective, but it is only modified before we actually switch away from
96 * the bootstrap page tables.
97 */
98 SECURITY_READ_ONLY_LATE(uint8_t) bootstrap_pagetables[BOOTSTRAP_TABLE_SIZE] __attribute__((aligned(ARM_PGBYTES)));
99
100 /*
101 * Denotes the end of xnu.
102 */
103 extern void *last_kernel_symbol;
104
105 extern void arm64_replace_bootstack(cpu_data_t*);
106 extern void PE_slide_devicetree(vm_offset_t);
107
108 /*
109 * KASLR parameters
110 */
111 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
112 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
113 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
114 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
115 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
116 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
117 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
118 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
119 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
120
121 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
122 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
123 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
124 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
125 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
126 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
127 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
128 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
129
130 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
131 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
132
133 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_base;
134 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_top;
135
136 /* Used by <mach/arm/vm_param.h> */
137 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
138 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
139 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
140 SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
141 SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
142
143 /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
144 * all kexts before the kernel. This is only for arm64 devices and looks
145 * something like the following:
146 * -- vmaddr order --
147 * 0xffffff8004004000 __PRELINK_TEXT
148 * 0xffffff8007004000 __TEXT (xnu)
149 * 0xffffff80075ec000 __DATA (xnu)
150 * 0xffffff80076dc000 __KLD (xnu)
151 * 0xffffff80076e0000 __LAST (xnu)
152 * 0xffffff80076e4000 __LINKEDIT (xnu)
153 * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
154 * 0xffffff800782c000 __PRELINK_INFO
155 * 0xffffff80078e4000 -- End of kernelcache
156 */
157
158 /* 24921709 - make XNU ready for KTRR
159 *
160 * Two possible kernel cache layouts, depending on which kcgen is being used.
161 * VAs increasing downwards.
162 * Old KCGEN:
163 *
164 * __PRELINK_TEXT
165 * __TEXT
166 * __DATA_CONST
167 * __TEXT_EXEC
168 * __KLD
169 * __LAST
170 * __DATA
171 * __PRELINK_DATA (expected empty)
172 * __LINKEDIT
173 * __PRELINK_INFO
174 *
175 * New kcgen:
176 *
177 * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment
178 * __PLK_DATA_CONST
179 * __PLK_TEXT_EXEC
180 * __TEXT
181 * __DATA_CONST
182 * __TEXT_EXEC
183 * __KLD
184 * __LAST <--- Last KTRR (ReadOnly) segment
185 * __DATA
186 * __BOOTDATA (if present)
187 * __LINKEDIT
188 * __PRELINK_DATA (expected populated now)
189 * __PLK_LINKEDIT
190 * __PRELINK_INFO
191 *
192 */
193
194 vm_offset_t mem_size; /* Size of actual physical memory present
195 * minus any performance buffer and possibly
196 * limited by mem_limit in bytes */
197 uint64_t mem_actual; /* The "One True" physical memory size
198 * actually, it's the highest physical
199 * address + 1 */
200 uint64_t max_mem; /* Size of physical memory (bytes), adjusted
201 * by maxmem */
202 uint64_t max_mem_actual; /* Actual size of physical memory (bytes),
203 * adjusted by the maxmem boot-arg */
204 uint64_t sane_size; /* Memory size to use for defaults
205 * calculations */
206 /* This no longer appears to be used; kill it? */
207 addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
208 * virtual address known
209 * to the VM system */
210
211 SECURITY_READ_ONLY_LATE(vm_offset_t) segEXTRADATA;
212 SECURITY_READ_ONLY_LATE(unsigned long) segSizeEXTRADATA;
213
214 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT;
215 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWEST;
216 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRO;
217 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRO;
218
219 /* Only set when booted from MH_FILESET kernel collections */
220 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTKC;
221 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTKC;
222 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROKC;
223 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROKC;
224 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTAuxKC;
225 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTAuxKC;
226 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROAuxKC;
227 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROAuxKC;
228 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRXAuxKC;
229 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRXAuxKC;
230 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTNLEAuxKC;
231
232 SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
233 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
234
235 #if XNU_MONITOR
236 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTEXTB;
237 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTEXT;
238
239 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTRAMPB;
240 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTRAMP;
241
242 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATACONSTB;
243 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATACONST;
244 SECURITY_READ_ONLY_LATE(void *) pmap_stacks_start = NULL;
245 SECURITY_READ_ONLY_LATE(void *) pmap_stacks_end = NULL;
246 #endif
247
248 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
249 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
250
251 SECURITY_READ_ONLY_LATE(vm_offset_t) segTEXTEXECB;
252 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTEXTEXEC;
253
254 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
255 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
256
257 #if XNU_MONITOR
258 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATAB;
259 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATA;
260 #endif
261
262 SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
263 SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
264 extern vm_offset_t intstack_low_guard;
265 extern vm_offset_t intstack_high_guard;
266 extern vm_offset_t excepstack_high_guard;
267
268 SECURITY_READ_ONLY_LATE(vm_offset_t) segLINKB;
269 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
270
271 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
272 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLD;
273 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
274 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
275 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTDATACONSTB;
276 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLASTDATACONST;
277
278 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBTEXTB;
279 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBTEXT;
280 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIBDATAB;
281 SECURITY_READ_ONLY_LATE(unsigned long) segSizeHIBDATA;
282 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBDATACONSTB;
283 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBDATACONST;
284
285 SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB;
286 SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT;
287
288 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB;
289 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
290
291 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB;
292 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
293
294 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB;
295 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
296
297 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0;
298 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
299
300 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB;
301 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
302
303 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB;
304 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
305
306 /* Only set when booted from MH_FILESET primary kernel collection */
307 SECURITY_READ_ONLY_LATE(vm_offset_t) segKCTEXTEXECB;
308 SECURITY_READ_ONLY_LATE(unsigned long) segSizeKCTEXTEXEC;
309 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATACONSTB;
310 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATACONST;
311 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATAB;
312 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATA;
313
314 SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
315
316 SECURITY_READ_ONLY_LATE(int) PAGE_SHIFT_CONST;
317
318 SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
319 SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
320 SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
321 SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
322
323 SECURITY_READ_ONLY_LATE(static vm_offset_t) auxkc_mh, auxkc_base, auxkc_right_above;
324
325 vm_offset_t alloc_ptpage(boolean_t map_static);
326 SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
327
328 /*
329 * Bootstrap the system enough to run with virtual memory.
330 * Map the kernel's code and data, and allocate the system page table.
331 * Page_size must already be set.
332 *
333 * Parameters:
334 * first_avail: first available physical page -
335 * after kernel page tables
336 * avail_start: PA of first physical page
337 * avail_end: PA of last physical page
338 */
339 SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail;
340 SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end;
341 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start;
342 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end;
343 SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end;
344 SECURITY_READ_ONLY_LATE(unsigned long) real_phys_size;
345 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_base = (vm_map_address_t)0;
346 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_end = (vm_map_address_t)0;
347
348 #if __ARM_KERNEL_PROTECT__
349 extern void ExceptionVectorsBase;
350 extern void ExceptionVectorsEnd;
351 #endif /* __ARM_KERNEL_PROTECT__ */
352
353 typedef struct {
354 pmap_paddr_t pa;
355 vm_map_address_t va;
356 vm_size_t len;
357 } ptov_table_entry;
358
359 #define PTOV_TABLE_SIZE 8
360 SECURITY_READ_ONLY_LATE(static ptov_table_entry) ptov_table[PTOV_TABLE_SIZE];
361 SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE;
362
363
364 vm_map_address_t
365 phystokv(pmap_paddr_t pa)
366 {
367 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
368 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
369 return pa - ptov_table[i].pa + ptov_table[i].va;
370 }
371 }
372 assertf((pa - gPhysBase) < real_phys_size, "%s: illegal PA: 0x%llx", __func__, (uint64_t)pa);
373 return pa - gPhysBase + gVirtBase;
374 }
375
376 vm_map_address_t
377 phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
378 {
379 vm_size_t len;
380 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
381 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
382 len = ptov_table[i].len - (pa - ptov_table[i].pa);
383 if (*max_len > len) {
384 *max_len = len;
385 }
386 return pa - ptov_table[i].pa + ptov_table[i].va;
387 }
388 }
389 len = PAGE_SIZE - (pa & PAGE_MASK);
390 if (*max_len > len) {
391 *max_len = len;
392 }
393 assertf((pa - gPhysBase) < real_phys_size, "%s: illegal PA: 0x%llx", __func__, (uint64_t)pa);
394 return pa - gPhysBase + gVirtBase;
395 }
396
397 vm_offset_t
398 ml_static_vtop(vm_offset_t va)
399 {
400 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
401 if ((va >= ptov_table[i].va) && (va < (ptov_table[i].va + ptov_table[i].len))) {
402 return va - ptov_table[i].va + ptov_table[i].pa;
403 }
404 }
405 assertf(((vm_address_t)(va) - gVirtBase) < gPhysSize, "%s: illegal VA: %p", __func__, (void*)va);
406 return (vm_address_t)(va) - gVirtBase + gPhysBase;
407 }
408
409 /*
410 * This rounds the given address up to the nearest boundary for a PTE contiguous
411 * hint.
412 */
413 static vm_offset_t
414 round_up_pte_hint_address(vm_offset_t address)
415 {
416 vm_offset_t hint_size = ARM_PTE_SIZE << ARM_PTE_HINT_ENTRIES_SHIFT;
417 return (address + (hint_size - 1)) & ~(hint_size - 1);
418 }
419
420 /* allocate a page for a page table: we support static and dynamic mappings.
421 *
422 * returns a virtual address for the allocated page
423 *
424 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
425 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
426 *
427 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
428 */
429
430 vm_offset_t
431 alloc_ptpage(boolean_t map_static)
432 {
433 vm_offset_t vaddr;
434
435 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
436 map_static = FALSE;
437 #endif
438
439 if (!ropage_next) {
440 ropage_next = (vm_offset_t)&ropagetable_begin;
441 }
442
443 if (map_static) {
444 assert(ropage_next < (vm_offset_t)&ropagetable_end);
445
446 vaddr = ropage_next;
447 ropage_next += ARM_PGBYTES;
448
449 return vaddr;
450 } else {
451 vaddr = phystokv(avail_start);
452 avail_start += ARM_PGBYTES;
453
454 return vaddr;
455 }
456 }
457
458 #if DEBUG
459
460 void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out);
461
462 void
463 dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out)
464 {
465 unsigned int i;
466 boolean_t cur_ro, prev_ro = 0;
467 int start_entry = -1;
468 tt_entry_t cur, prev = 0;
469 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
470 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
471 boolean_t tt_static = kvtophys((vm_offset_t)tt) >= robegin &&
472 kvtophys((vm_offset_t)tt) < roend;
473
474 for (i = 0; i < TTE_PGENTRIES; i++) {
475 int tte_type = tt[i] & ARM_TTE_TYPE_MASK;
476 cur = tt[i] & ARM_TTE_TABLE_MASK;
477
478 if (tt_static) {
479 /* addresses mapped by this entry are static if it is a block mapping,
480 * or the table was allocated from the RO page table region */
481 cur_ro = (tte_type == ARM_TTE_TYPE_BLOCK) || (cur >= robegin && cur < roend);
482 } else {
483 cur_ro = 0;
484 }
485
486 if ((cur == 0 && prev != 0) || (cur_ro != prev_ro && prev != 0)) { // falling edge
487 uintptr_t start, end, sz;
488
489 start = (uintptr_t)start_entry << ARM_TT_L2_SHIFT;
490 start += tt_base;
491 end = ((uintptr_t)i << ARM_TT_L2_SHIFT) - 1;
492 end += tt_base;
493
494 sz = end - start + 1;
495 printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n",
496 indent * 4, "",
497 (uint32_t)(start >> 32), (uint32_t)start,
498 (uint32_t)(end >> 32), (uint32_t)end,
499 prev_ro ? "Static " : "Dynamic",
500 (sz >> 20));
501
502 if (prev_ro) {
503 *rosz_out += sz;
504 } else {
505 *rwsz_out += sz;
506 }
507 }
508
509 if ((prev == 0 && cur != 0) || cur_ro != prev_ro) { // rising edge: set start
510 start_entry = i;
511 }
512
513 prev = cur;
514 prev_ro = cur_ro;
515 }
516 }
517
518 void
519 dump_kva_space()
520 {
521 uint64_t tot_rosz = 0, tot_rwsz = 0;
522 int ro_ptpages, rw_ptpages;
523 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
524 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
525 boolean_t root_static = kvtophys((vm_offset_t)cpu_tte) >= robegin &&
526 kvtophys((vm_offset_t)cpu_tte) < roend;
527 uint64_t kva_base = ~((1ULL << (64 - T1SZ_BOOT)) - 1);
528
529 printf("Root page table: %s\n", root_static ? "Static" : "Dynamic");
530
531 for (unsigned int i = 0; i < TTE_PGENTRIES; i++) {
532 pmap_paddr_t cur;
533 boolean_t cur_ro;
534 uintptr_t start, end;
535 uint64_t rosz = 0, rwsz = 0;
536
537 if ((cpu_tte[i] & ARM_TTE_VALID) == 0) {
538 continue;
539 }
540
541 cur = cpu_tte[i] & ARM_TTE_TABLE_MASK;
542 start = (uint64_t)i << ARM_TT_L1_SHIFT;
543 start = start + kva_base;
544 end = start + (ARM_TT_L1_SIZE - 1);
545 cur_ro = cur >= robegin && cur < roend;
546
547 printf("0x%08x_%08x-0x%08x_%08x %s\n",
548 (uint32_t)(start >> 32), (uint32_t)start,
549 (uint32_t)(end >> 32), (uint32_t)end,
550 cur_ro ? "Static " : "Dynamic");
551
552 dump_kva_l2(start, (tt_entry_t*)phystokv(cur), 1, &rosz, &rwsz);
553 tot_rosz += rosz;
554 tot_rwsz += rwsz;
555 }
556
557 printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n",
558 tot_rosz >> 20,
559 tot_rwsz >> 20,
560 (tot_rosz >> 20) + (tot_rwsz >> 20));
561
562 ro_ptpages = (int)((ropage_next - (vm_offset_t)&ropagetable_begin) >> ARM_PGSHIFT);
563 rw_ptpages = (int)(lowGlo.lgStaticSize >> ARM_PGSHIFT);
564 printf("Pages used: static %d dynamic %d\n", ro_ptpages, rw_ptpages);
565 }
566
567 #endif /* DEBUG */
568
569 #if __ARM_KERNEL_PROTECT__ || XNU_MONITOR
570 /*
571 * arm_vm_map:
572 * root_ttp: The kernel virtual address for the root of the target page tables
573 * vaddr: The target virtual address
574 * pte: A page table entry value (may be ARM_PTE_EMPTY)
575 *
576 * This function installs pte at vaddr in root_ttp. Any page table pages needed
577 * to install pte will be allocated by this function.
578 */
579 static void
580 arm_vm_map(tt_entry_t * root_ttp, vm_offset_t vaddr, pt_entry_t pte)
581 {
582 vm_offset_t ptpage = 0;
583 tt_entry_t * ttp = root_ttp;
584
585 tt_entry_t * l1_ttep = NULL;
586 tt_entry_t l1_tte = 0;
587
588 tt_entry_t * l2_ttep = NULL;
589 tt_entry_t l2_tte = 0;
590 pt_entry_t * ptep = NULL;
591 pt_entry_t cpte = 0;
592
593 /*
594 * Walk the target page table to find the PTE for the given virtual
595 * address. Allocate any page table pages needed to do this.
596 */
597 l1_ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
598 l1_tte = *l1_ttep;
599
600 if (l1_tte == ARM_TTE_EMPTY) {
601 ptpage = alloc_ptpage(TRUE);
602 bzero((void *)ptpage, ARM_PGBYTES);
603 l1_tte = kvtophys(ptpage);
604 l1_tte &= ARM_TTE_TABLE_MASK;
605 l1_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
606 *l1_ttep = l1_tte;
607 ptpage = 0;
608 }
609
610 ttp = (tt_entry_t *)phystokv(l1_tte & ARM_TTE_TABLE_MASK);
611
612 l2_ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
613 l2_tte = *l2_ttep;
614
615 if (l2_tte == ARM_TTE_EMPTY) {
616 ptpage = alloc_ptpage(TRUE);
617 bzero((void *)ptpage, ARM_PGBYTES);
618 l2_tte = kvtophys(ptpage);
619 l2_tte &= ARM_TTE_TABLE_MASK;
620 l2_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
621 *l2_ttep = l2_tte;
622 ptpage = 0;
623 }
624
625 ttp = (tt_entry_t *)phystokv(l2_tte & ARM_TTE_TABLE_MASK);
626
627 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
628 cpte = *ptep;
629
630 /*
631 * If the existing PTE is not empty, then we are replacing a valid
632 * mapping.
633 */
634 if (cpte != ARM_PTE_EMPTY) {
635 panic("%s: cpte=%#llx is not empty, "
636 "vaddr=%#lx, pte=%#llx",
637 __FUNCTION__, cpte,
638 vaddr, pte);
639 }
640
641 *ptep = pte;
642 }
643
644 #endif // __ARM_KERNEL_PROTECT || XNU_MONITOR
645
646 #if __ARM_KERNEL_PROTECT__
647
648 /*
649 * arm_vm_kernel_el0_map:
650 * vaddr: The target virtual address
651 * pte: A page table entry value (may be ARM_PTE_EMPTY)
652 *
653 * This function installs pte at vaddr for the EL0 kernel mappings.
654 */
655 static void
656 arm_vm_kernel_el0_map(vm_offset_t vaddr, pt_entry_t pte)
657 {
658 /* Calculate where vaddr will be in the EL1 kernel page tables. */
659 vm_offset_t kernel_pmap_vaddr = vaddr - ((ARM_TT_ROOT_INDEX_MASK + ARM_TT_ROOT_SIZE) / 2ULL);
660 arm_vm_map(cpu_tte, kernel_pmap_vaddr, pte);
661 }
662
663 /*
664 * arm_vm_kernel_el1_map:
665 * vaddr: The target virtual address
666 * pte: A page table entry value (may be ARM_PTE_EMPTY)
667 *
668 * This function installs pte at vaddr for the EL1 kernel mappings.
669 */
670 static void
671 arm_vm_kernel_el1_map(vm_offset_t vaddr, pt_entry_t pte)
672 {
673 arm_vm_map(cpu_tte, vaddr, pte);
674 }
675
676 /*
677 * arm_vm_kernel_pte:
678 * vaddr: The target virtual address
679 *
680 * This function returns the PTE value for the given vaddr from the kernel page
681 * tables. If the region has been been block mapped, we return what an
682 * equivalent PTE value would be (as regards permissions and flags). We also
683 * remove the HINT bit (as we are not necessarily creating contiguous mappings.
684 */
685 static pt_entry_t
686 arm_vm_kernel_pte(vm_offset_t vaddr)
687 {
688 tt_entry_t * ttp = cpu_tte;
689 tt_entry_t * ttep = NULL;
690 tt_entry_t tte = 0;
691 pt_entry_t * ptep = NULL;
692 pt_entry_t pte = 0;
693
694 ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
695 tte = *ttep;
696
697 assert(tte & ARM_TTE_VALID);
698
699 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
700 /* This is a block mapping; return the equivalent PTE value. */
701 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
702 pte |= ARM_PTE_TYPE_VALID;
703 pte |= vaddr & ((ARM_TT_L1_SIZE - 1) & ARM_PTE_PAGE_MASK);
704 pte &= ~ARM_PTE_HINT_MASK;
705 return pte;
706 }
707
708 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
709 ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
710 tte = *ttep;
711
712 assert(tte & ARM_TTE_VALID);
713
714 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
715 /* This is a block mapping; return the equivalent PTE value. */
716 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
717 pte |= ARM_PTE_TYPE_VALID;
718 pte |= vaddr & ((ARM_TT_L2_SIZE - 1) & ARM_PTE_PAGE_MASK);
719 pte &= ~ARM_PTE_HINT_MASK;
720 return pte;
721 }
722
723 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
724
725 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
726 pte = *ptep;
727 pte &= ~ARM_PTE_HINT_MASK;
728 return pte;
729 }
730
731 /*
732 * arm_vm_prepare_kernel_el0_mappings:
733 * alloc_only: Indicates if PTE values should be copied from the EL1 kernel
734 * mappings.
735 *
736 * This function expands the kernel page tables to support the EL0 kernel
737 * mappings, and conditionally installs the PTE values for the EL0 kernel
738 * mappings (if alloc_only is false).
739 */
740 static void
741 arm_vm_prepare_kernel_el0_mappings(bool alloc_only)
742 {
743 pt_entry_t pte = 0;
744 vm_offset_t start = ((vm_offset_t)&ExceptionVectorsBase) & ~PAGE_MASK;
745 vm_offset_t end = (((vm_offset_t)&ExceptionVectorsEnd) + PAGE_MASK) & ~PAGE_MASK;
746 vm_offset_t cur = 0;
747 vm_offset_t cur_fixed = 0;
748
749 /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */
750 for (cur = start, cur_fixed = ARM_KERNEL_PROTECT_EXCEPTION_START; cur < end; cur += ARM_PGBYTES, cur_fixed += ARM_PGBYTES) {
751 /*
752 * We map the exception vectors at a different address than that
753 * of the kernelcache to avoid sharing page table pages with the
754 * kernelcache (as this may cause issues with TLB caching of
755 * page table pages.
756 */
757 if (!alloc_only) {
758 pte = arm_vm_kernel_pte(cur);
759 }
760
761 arm_vm_kernel_el1_map(cur_fixed, pte);
762 arm_vm_kernel_el0_map(cur_fixed, pte);
763 }
764
765 __builtin_arm_dmb(DMB_ISH);
766 __builtin_arm_isb(ISB_SY);
767
768 if (!alloc_only) {
769 /*
770 * If we have created the alternate exception vector mappings,
771 * the boot CPU may now switch over to them.
772 */
773 set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START);
774 __builtin_arm_isb(ISB_SY);
775 }
776 }
777
778 /*
779 * arm_vm_populate_kernel_el0_mappings:
780 *
781 * This function adds all required mappings to the EL0 kernel mappings.
782 */
783 static void
784 arm_vm_populate_kernel_el0_mappings(void)
785 {
786 arm_vm_prepare_kernel_el0_mappings(FALSE);
787 }
788
789 /*
790 * arm_vm_expand_kernel_el0_mappings:
791 *
792 * This function expands the kernel page tables to accomodate the EL0 kernel
793 * mappings.
794 */
795 static void
796 arm_vm_expand_kernel_el0_mappings(void)
797 {
798 arm_vm_prepare_kernel_el0_mappings(TRUE);
799 }
800 #endif /* __ARM_KERNEL_PROTECT__ */
801
802 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
803 extern void bootstrap_instructions;
804
805 /*
806 * arm_replace_identity_map takes the V=P map that we construct in start.s
807 * and repurposes it in order to have it map only the page we need in order
808 * to turn on the MMU. This prevents us from running into issues where
809 * KTRR will cause us to fault on executable block mappings that cross the
810 * KTRR boundary.
811 */
812 static void
813 arm_replace_identity_map(void)
814 {
815 vm_offset_t addr;
816 pmap_paddr_t paddr;
817
818 pmap_paddr_t l1_ptp_phys = 0;
819 tt_entry_t *l1_ptp_virt = NULL;
820 tt_entry_t *tte1 = NULL;
821 pmap_paddr_t l2_ptp_phys = 0;
822 tt_entry_t *l2_ptp_virt = NULL;
823 tt_entry_t *tte2 = NULL;
824 pmap_paddr_t l3_ptp_phys = 0;
825 pt_entry_t *l3_ptp_virt = NULL;
826 pt_entry_t *ptep = NULL;
827
828 addr = ((vm_offset_t)&bootstrap_instructions) & ~ARM_PGMASK;
829 paddr = kvtophys(addr);
830
831 /*
832 * Grab references to the V=P page tables, and allocate an L3 page.
833 */
834 l1_ptp_phys = kvtophys((vm_offset_t)&bootstrap_pagetables);
835 l1_ptp_virt = (tt_entry_t *)phystokv(l1_ptp_phys);
836 tte1 = &l1_ptp_virt[L1_TABLE_INDEX(paddr)];
837
838 l2_ptp_virt = L2_TABLE_VA(tte1);
839 l2_ptp_phys = (*tte1) & ARM_TTE_TABLE_MASK;
840 tte2 = &l2_ptp_virt[L2_TABLE_INDEX(paddr)];
841
842 l3_ptp_virt = (pt_entry_t *)alloc_ptpage(TRUE);
843 l3_ptp_phys = kvtophys((vm_offset_t)l3_ptp_virt);
844 ptep = &l3_ptp_virt[L3_TABLE_INDEX(paddr)];
845
846 /*
847 * Replace the large V=P mapping with a mapping that provides only the
848 * mappings needed to turn on the MMU.
849 */
850
851 bzero(l1_ptp_virt, ARM_PGBYTES);
852 *tte1 = ARM_TTE_BOOT_TABLE | (l2_ptp_phys & ARM_TTE_TABLE_MASK);
853
854 bzero(l2_ptp_virt, ARM_PGBYTES);
855 *tte2 = ARM_TTE_BOOT_TABLE | (l3_ptp_phys & ARM_TTE_TABLE_MASK);
856
857 *ptep = (paddr & ARM_PTE_MASK) |
858 ARM_PTE_TYPE_VALID |
859 ARM_PTE_SH(SH_OUTER_MEMORY) |
860 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) |
861 ARM_PTE_AF |
862 ARM_PTE_AP(AP_RONA) |
863 ARM_PTE_NX;
864 }
865 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
866
867 tt_entry_t *arm_kva_to_tte(vm_offset_t);
868
869 tt_entry_t *
870 arm_kva_to_tte(vm_offset_t va)
871 {
872 tt_entry_t *tte1, *tte2;
873 tte1 = cpu_tte + L1_TABLE_INDEX(va);
874 tte2 = L2_TABLE_VA(tte1) + L2_TABLE_INDEX(va);
875
876 return tte2;
877 }
878
879 #if XNU_MONITOR
880
881 static inline pt_entry_t *
882 arm_kva_to_pte(vm_offset_t va)
883 {
884 tt_entry_t *tte2 = arm_kva_to_tte(va);
885 return L3_TABLE_VA(tte2) + L3_TABLE_INDEX(va);
886 }
887
888 #endif
889
890 #define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
891 #define ARM64_GRANULE_ALLOW_HINT (1 << 1)
892
893 /*
894 * arm_vm_page_granular_helper updates protections at the L3 level. It will (if
895 * neccessary) allocate a page for the L3 table and update the corresponding L2
896 * entry. Then, it will iterate over the L3 table, updating protections as necessary.
897 * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should
898 * not be invoked from a context that does not do L2 iteration separately (basically,
899 * don't call this except from arm_vm_page_granular_prot).
900 *
901 * unsigned granule: 0 => force to page granule, or a combination of
902 * ARM64_GRANULE_* flags declared above.
903 */
904
905 static void
906 arm_vm_page_granular_helper(vm_offset_t start, vm_offset_t _end, vm_offset_t va, pmap_paddr_t pa_offset,
907 int pte_prot_APX, int pte_prot_XN, unsigned granule,
908 pt_entry_t **deferred_pte, pt_entry_t *deferred_ptmp)
909 {
910 if (va & ARM_TT_L2_OFFMASK) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */
911 tt_entry_t *tte2;
912 tt_entry_t tmplate;
913 pmap_paddr_t pa;
914 pt_entry_t *ppte, *recursive_pte = NULL, ptmp, recursive_ptmp = 0;
915 addr64_t ppte_phys;
916 unsigned i;
917
918 va &= ~ARM_TT_L2_OFFMASK;
919 pa = va - gVirtBase + gPhysBase - pa_offset;
920
921 if (pa >= real_avail_end) {
922 return;
923 }
924
925 tte2 = arm_kva_to_tte(va);
926
927 assert(_end >= va);
928 tmplate = *tte2;
929
930 if (ARM_TTE_TYPE_TABLE == (tmplate & ARM_TTE_TYPE_MASK)) {
931 /* pick up the existing page table. */
932 ppte = (pt_entry_t *)phystokv((tmplate & ARM_TTE_TABLE_MASK));
933 } else {
934 // TTE must be reincarnated with page level mappings.
935
936 // ... but we don't want to break up blocks on live
937 // translation tables.
938 assert(!kva_active);
939
940 ppte = (pt_entry_t*)alloc_ptpage(pa_offset == 0);
941 bzero(ppte, ARM_PGBYTES);
942 ppte_phys = kvtophys((vm_offset_t)ppte);
943
944 *tte2 = pa_to_tte(ppte_phys) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
945 }
946
947 vm_offset_t len = _end - va;
948 if ((pa + len) > real_avail_end) {
949 _end -= (pa + len - real_avail_end);
950 }
951 assert((start - gVirtBase + gPhysBase - pa_offset) >= gPhysBase);
952
953 /* Round up to the nearest PAGE_SIZE boundary when creating mappings:
954 * PAGE_SIZE may be a multiple of ARM_PGBYTES, and we don't want to leave
955 * a ragged non-PAGE_SIZE-aligned edge. */
956 vm_offset_t rounded_end = round_page(_end);
957 /* Apply the desired protections to the specified page range */
958 for (i = 0; i <= (ARM_TT_L3_INDEX_MASK >> ARM_TT_L3_SHIFT); i++) {
959 if ((start <= va) && (va < rounded_end)) {
960 ptmp = pa | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE;
961 ptmp = ptmp | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
962 ptmp = ptmp | ARM_PTE_AP(pte_prot_APX);
963 ptmp = ptmp | ARM_PTE_NX;
964 #if __ARM_KERNEL_PROTECT__
965 ptmp = ptmp | ARM_PTE_NG;
966 #endif /* __ARM_KERNEL_PROTECT__ */
967
968 if (pte_prot_XN) {
969 ptmp = ptmp | ARM_PTE_PNX;
970 }
971
972 /*
973 * If we can, apply the contiguous hint to this range. The hint is
974 * applicable if the current address falls within a hint-sized range that will
975 * be fully covered by this mapping request.
976 */
977 if ((va >= round_up_pte_hint_address(start)) && (round_up_pte_hint_address(va + 1) <= _end) &&
978 (granule & ARM64_GRANULE_ALLOW_HINT) && use_contiguous_hint) {
979 assert((va & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)) == ((pa & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1))));
980 ptmp |= ARM_PTE_HINT;
981 /* Do not attempt to reapply the hint bit to an already-active mapping.
982 * This very likely means we're attempting to change attributes on an already-active mapping,
983 * which violates the requirement of the hint bit.*/
984 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT));
985 }
986 /*
987 * Do not change the contiguous bit on an active mapping. Even in a single-threaded
988 * environment, it's possible for prefetch to produce a TLB conflict by trying to pull in
989 * a hint-sized entry on top of one or more existing page-sized entries. It's also useful
990 * to make sure we're not trying to unhint a sub-range of a larger hinted range, which
991 * could produce a later TLB conflict.
992 */
993 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT) || ((ppte[i] & ARM_PTE_HINT) == (ptmp & ARM_PTE_HINT)));
994
995 /*
996 * If we reach an entry that maps the current pte page, delay updating it until the very end.
997 * Otherwise we might end up making the PTE page read-only, leading to a fault later on in
998 * this function if we manage to outrun the TLB. This can happen on KTRR-enabled devices when
999 * marking segDATACONST read-only. Mappings for this region may straddle a PT page boundary,
1000 * so we must also defer assignment of the following PTE. We will assume that if the region
1001 * were to require one or more full L3 pages, it would instead use L2 blocks where possible,
1002 * therefore only requiring at most one L3 page at the beginning and one at the end.
1003 */
1004 if (kva_active && ((pt_entry_t*)(phystokv(pa)) == ppte)) {
1005 assert(recursive_pte == NULL);
1006 assert(granule & ARM64_GRANULE_ALLOW_BLOCK);
1007 recursive_pte = &ppte[i];
1008 recursive_ptmp = ptmp;
1009 } else if ((deferred_pte != NULL) && (&ppte[i] == &recursive_pte[1])) {
1010 assert(*deferred_pte == NULL);
1011 assert(deferred_ptmp != NULL);
1012 *deferred_pte = &ppte[i];
1013 *deferred_ptmp = ptmp;
1014 } else {
1015 ppte[i] = ptmp;
1016 }
1017 }
1018
1019 va += ARM_PGBYTES;
1020 pa += ARM_PGBYTES;
1021 }
1022 if (recursive_pte != NULL) {
1023 *recursive_pte = recursive_ptmp;
1024 }
1025 }
1026 }
1027
1028 /*
1029 * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and
1030 * changing them. If a particular chunk necessitates L3 entries (for reasons of
1031 * alignment or length, or an explicit request that the entry be fully expanded), we
1032 * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic.
1033 */
1034 static void
1035 arm_vm_page_granular_prot(vm_offset_t start, unsigned long size, pmap_paddr_t pa_offset,
1036 int tte_prot_XN, int pte_prot_APX, int pte_prot_XN,
1037 unsigned granule)
1038 {
1039 pt_entry_t *deferred_pte = NULL, deferred_ptmp = 0;
1040 vm_offset_t _end = start + size;
1041 vm_offset_t align_start = (start + ARM_TT_L2_OFFMASK) & ~ARM_TT_L2_OFFMASK;
1042
1043 if (size == 0x0UL) {
1044 return;
1045 }
1046
1047 if (align_start > _end) {
1048 arm_vm_page_granular_helper(start, _end, start, pa_offset, pte_prot_APX, pte_prot_XN, granule, NULL, NULL);
1049 return;
1050 }
1051
1052 arm_vm_page_granular_helper(start, align_start, start, pa_offset, pte_prot_APX, pte_prot_XN, granule, &deferred_pte, &deferred_ptmp);
1053
1054 while ((_end - align_start) >= ARM_TT_L2_SIZE) {
1055 if (!(granule & ARM64_GRANULE_ALLOW_BLOCK)) {
1056 arm_vm_page_granular_helper(align_start, align_start + ARM_TT_L2_SIZE, align_start + 1, pa_offset,
1057 pte_prot_APX, pte_prot_XN, granule, NULL, NULL);
1058 } else {
1059 pmap_paddr_t pa = align_start - gVirtBase + gPhysBase - pa_offset;
1060 assert((pa & ARM_TT_L2_OFFMASK) == 0);
1061 tt_entry_t *tte2;
1062 tt_entry_t tmplate;
1063
1064 tte2 = arm_kva_to_tte(align_start);
1065
1066 if ((pa >= gPhysBase) && (pa < real_avail_end)) {
1067 tmplate = (pa & ARM_TTE_BLOCK_L2_MASK) | ARM_TTE_TYPE_BLOCK
1068 | ARM_TTE_VALID | ARM_TTE_BLOCK_AF | ARM_TTE_BLOCK_NX
1069 | ARM_TTE_BLOCK_AP(pte_prot_APX) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY)
1070 | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
1071
1072 #if __ARM_KERNEL_PROTECT__
1073 tmplate = tmplate | ARM_TTE_BLOCK_NG;
1074 #endif /* __ARM_KERNEL_PROTECT__ */
1075 if (tte_prot_XN) {
1076 tmplate = tmplate | ARM_TTE_BLOCK_PNX;
1077 }
1078
1079 *tte2 = tmplate;
1080 }
1081 }
1082 align_start += ARM_TT_L2_SIZE;
1083 }
1084
1085 if (align_start < _end) {
1086 arm_vm_page_granular_helper(align_start, _end, _end, pa_offset, pte_prot_APX, pte_prot_XN, granule, &deferred_pte, &deferred_ptmp);
1087 }
1088
1089 if (deferred_pte != NULL) {
1090 *deferred_pte = deferred_ptmp;
1091 }
1092 }
1093
1094 static inline void
1095 arm_vm_page_granular_RNX(vm_offset_t start, unsigned long size, unsigned granule)
1096 {
1097 arm_vm_page_granular_prot(start, size, 0, 1, AP_RONA, 1, granule);
1098 }
1099
1100 static inline void
1101 arm_vm_page_granular_ROX(vm_offset_t start, unsigned long size, unsigned granule)
1102 {
1103 arm_vm_page_granular_prot(start, size, 0, 0, AP_RONA, 0, granule);
1104 }
1105
1106 static inline void
1107 arm_vm_page_granular_RWNX(vm_offset_t start, unsigned long size, unsigned granule)
1108 {
1109 arm_vm_page_granular_prot(start, size, 0, 1, AP_RWNA, 1, granule);
1110 }
1111
1112 /* used in the chosen/memory-map node, populated by iBoot. */
1113 typedef struct MemoryMapFileInfo {
1114 vm_offset_t paddr;
1115 size_t length;
1116 } MemoryMapFileInfo;
1117
1118 // Populate seg...AuxKC and fixup AuxKC permissions
1119 static bool
1120 arm_vm_auxkc_init(void)
1121 {
1122 if (auxkc_mh == 0 || auxkc_base == 0) {
1123 return false; // no auxKC.
1124 }
1125
1126 /* Fixup AuxKC and populate seg*AuxKC globals used below */
1127 arm_auxkc_init((void*)auxkc_mh, (void*)auxkc_base);
1128
1129 if (segLOWESTAuxKC != segLOWEST) {
1130 panic("segLOWESTAuxKC (%p) not equal to segLOWEST (%p). auxkc_mh: %p, auxkc_base: %p",
1131 (void*)segLOWESTAuxKC, (void*)segLOWEST,
1132 (void*)auxkc_mh, (void*)auxkc_base);
1133 }
1134
1135 /*
1136 * The AuxKC LINKEDIT segment needs to be covered by the RO region but is excluded
1137 * from the RO address range returned by kernel_collection_adjust_mh_addrs().
1138 * Ensure the highest non-LINKEDIT address in the AuxKC is the current end of
1139 * its RO region before extending it.
1140 */
1141 assert(segHIGHESTROAuxKC == segHIGHESTNLEAuxKC);
1142 assert(segHIGHESTAuxKC >= segHIGHESTROAuxKC);
1143 if (segHIGHESTAuxKC > segHIGHESTROAuxKC) {
1144 segHIGHESTROAuxKC = segHIGHESTAuxKC;
1145 }
1146
1147 /*
1148 * The AuxKC RO region must be right below the device tree/trustcache so that it can be covered
1149 * by CTRR, and the AuxKC RX region must be within the RO region.
1150 */
1151 assert(segHIGHESTROAuxKC == auxkc_right_above);
1152 assert(segHIGHESTRXAuxKC <= segHIGHESTROAuxKC);
1153 assert(segLOWESTRXAuxKC <= segHIGHESTRXAuxKC);
1154 assert(segLOWESTROAuxKC <= segLOWESTRXAuxKC);
1155 assert(segLOWESTAuxKC <= segLOWESTROAuxKC);
1156
1157 if (segHIGHESTRXAuxKC < segLOWEST) {
1158 arm_vm_page_granular_RNX(segHIGHESTRXAuxKC, segLOWEST - segHIGHESTRXAuxKC, 0);
1159 }
1160 if (segLOWESTRXAuxKC < segHIGHESTRXAuxKC) {
1161 arm_vm_page_granular_ROX(segLOWESTRXAuxKC, segHIGHESTRXAuxKC - segLOWESTRXAuxKC, 0); // Refined in OSKext::readPrelinkedExtensions
1162 }
1163 if (segLOWESTROAuxKC < segLOWESTRXAuxKC) {
1164 arm_vm_page_granular_RNX(segLOWESTROAuxKC, segLOWESTRXAuxKC - segLOWESTROAuxKC, 0);
1165 }
1166 if (segLOWESTAuxKC < segLOWESTROAuxKC) {
1167 arm_vm_page_granular_RWNX(segLOWESTAuxKC, segLOWESTROAuxKC - segLOWESTAuxKC, 0);
1168 }
1169
1170 return true;
1171 }
1172
1173 void
1174 arm_vm_prot_init(__unused boot_args * args)
1175 {
1176 segLOWESTTEXT = UINT64_MAX;
1177 if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) {
1178 segLOWESTTEXT = segPRELINKTEXTB;
1179 }
1180 assert(segSizeTEXT);
1181 if (segTEXTB < segLOWESTTEXT) {
1182 segLOWESTTEXT = segTEXTB;
1183 }
1184 assert(segLOWESTTEXT < UINT64_MAX);
1185
1186 segEXTRADATA = segLOWESTTEXT;
1187 segSizeEXTRADATA = 0;
1188
1189 segLOWEST = segLOWESTTEXT;
1190 segLOWESTRO = segLOWESTTEXT;
1191
1192 if (segLOWESTKC && segLOWESTKC < segLOWEST) {
1193 /*
1194 * kernel collections have segments below the kernel. In particular the collection mach header
1195 * is below PRELINK_TEXT and is not covered by any other segments already tracked.
1196 */
1197 arm_vm_page_granular_RNX(segLOWESTKC, segLOWEST - segLOWESTKC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1198 segLOWEST = segLOWESTKC;
1199 if (segLOWESTROKC && segLOWESTROKC < segLOWESTRO) {
1200 segLOWESTRO = segLOWESTROKC;
1201 }
1202 if (segHIGHESTROKC && segHIGHESTROKC > segHIGHESTRO) {
1203 segHIGHESTRO = segHIGHESTROKC;
1204 }
1205 }
1206
1207 DTEntry memory_map;
1208 MemoryMapFileInfo const *trustCacheRange;
1209 unsigned int trustCacheRangeSize;
1210 int err;
1211
1212 if (SecureDTIsLockedDown()) {
1213 segEXTRADATA = (vm_offset_t)PE_state.deviceTreeHead;
1214 segSizeEXTRADATA = PE_state.deviceTreeSize;
1215 }
1216
1217 err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1218 assert(err == kSuccess);
1219
1220 err = SecureDTGetProperty(memory_map, "TrustCache", (void const **)&trustCacheRange, &trustCacheRangeSize);
1221 if (err == kSuccess) {
1222 assert(trustCacheRangeSize == sizeof(MemoryMapFileInfo));
1223
1224 if (segSizeEXTRADATA == 0) {
1225 segEXTRADATA = phystokv(trustCacheRange->paddr);
1226 segSizeEXTRADATA = trustCacheRange->length;
1227 } else {
1228 segSizeEXTRADATA += trustCacheRange->length;
1229 }
1230 }
1231
1232 if (segSizeEXTRADATA != 0) {
1233 if (segEXTRADATA <= segLOWEST) {
1234 segLOWEST = segEXTRADATA;
1235 if (segEXTRADATA <= segLOWESTRO) {
1236 segLOWESTRO = segEXTRADATA;
1237 }
1238 }
1239 #if !(DEBUG || DEVELOPMENT)
1240
1241
1242 else {
1243 panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1244 }
1245 #endif /* !(DEBUG || DEVELOPMENT) */
1246
1247 arm_vm_page_granular_RNX(segEXTRADATA, segSizeEXTRADATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1248 }
1249
1250 const MemoryMapFileInfo *auxKC_range, *auxKC_header_range;
1251 unsigned int auxKC_range_size, auxKC_header_range_size;
1252
1253 err = SecureDTGetProperty(memory_map, "AuxKC", (const void**)&auxKC_range,
1254 &auxKC_range_size);
1255 if (err != kSuccess) {
1256 goto noAuxKC;
1257 }
1258 assert(auxKC_range_size == sizeof(MemoryMapFileInfo));
1259 err = SecureDTGetProperty(memory_map, "AuxKC-mach_header",
1260 (const void**)&auxKC_header_range, &auxKC_header_range_size);
1261 if (err != kSuccess) {
1262 goto noAuxKC;
1263 }
1264 assert(auxKC_header_range_size == sizeof(MemoryMapFileInfo));
1265
1266 auxkc_mh = phystokv(auxKC_header_range->paddr);
1267 auxkc_base = phystokv(auxKC_range->paddr);
1268 if (!auxkc_mh || !auxkc_base) {
1269 goto noAuxKC;
1270 }
1271
1272 if (auxkc_base < segLOWEST) {
1273 auxkc_right_above = segLOWEST;
1274 segLOWEST = auxkc_base;
1275 } else {
1276 panic("auxkc_base (%p) not below segLOWEST (%p)", (void*)auxkc_base, (void*)segLOWEST);
1277 }
1278
1279 /* Map AuxKC RWNX initially so that arm_vm_auxkc_init can traverse
1280 * it and apply fixups (after we're off the bootstrap translation
1281 * tables).
1282 */
1283 arm_vm_page_granular_RWNX(auxkc_base, auxKC_range->length, 0);
1284
1285 noAuxKC:
1286 /* Map coalesced kext TEXT segment RWNX for now */
1287 arm_vm_page_granular_RWNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1288
1289 /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */
1290 arm_vm_page_granular_RWNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1291
1292 /* Map coalesced kext TEXT_EXEC segment RX (could be empty) */
1293 arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Refined in OSKext::readPrelinkedExtensions
1294
1295 /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX
1296 * otherwise we no longer expect any space between the coalesced kext read only segments and xnu rosegments
1297 */
1298 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC) {
1299 if (segSizePRELINKTEXT) {
1300 arm_vm_page_granular_RWNX(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT),
1301 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1302 }
1303 } else {
1304 /*
1305 * If we have the new segments, we should still protect the gap between kext
1306 * read-only pages and kernel read-only pages, in the event that this gap
1307 * exists.
1308 */
1309 if ((segPLKDATACONSTB + segSizePLKDATACONST) < segTEXTB) {
1310 arm_vm_page_granular_RWNX(segPLKDATACONSTB + segSizePLKDATACONST, segTEXTB - (segPLKDATACONSTB + segSizePLKDATACONST),
1311 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1312 }
1313 }
1314
1315 /*
1316 * Protection on kernel text is loose here to allow shenanigans early on. These
1317 * protections are tightened in arm_vm_prot_finalize(). This is necessary because
1318 * we currently patch LowResetVectorBase in cpu.c.
1319 *
1320 * TEXT segment contains mach headers and other non-executable data. This will become RONX later.
1321 */
1322 arm_vm_page_granular_RNX(segTEXTB, segSizeTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1323
1324 /* Can DATACONST start out and stay RNX?
1325 * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
1326 * Make RNX in prot_finalize
1327 */
1328 arm_vm_page_granular_RWNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1329
1330 arm_vm_page_granular_ROX(segTEXTEXECB, segSizeTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1331
1332 #if XNU_MONITOR
1333 arm_vm_page_granular_ROX(segPPLTEXTB, segSizePPLTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1334 arm_vm_page_granular_ROX(segPPLTRAMPB, segSizePPLTRAMP, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1335 arm_vm_page_granular_RNX(segPPLDATACONSTB, segSizePPLDATACONST, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1336 #endif
1337
1338 /* DATA segment will remain RWNX */
1339 arm_vm_page_granular_RWNX(segDATAB, segSizeDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1340 #if XNU_MONITOR
1341 arm_vm_page_granular_RWNX(segPPLDATAB, segSizePPLDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1342 #endif
1343
1344 arm_vm_page_granular_RWNX(segHIBDATAB, segSizeHIBDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1345
1346 arm_vm_page_granular_RWNX(segBOOTDATAB, segSizeBOOTDATA, 0);
1347 arm_vm_page_granular_RNX((vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, 0);
1348 arm_vm_page_granular_RNX((vm_offset_t)&intstack_high_guard, PAGE_MAX_SIZE, 0);
1349 arm_vm_page_granular_RNX((vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, 0);
1350
1351 arm_vm_page_granular_ROX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1352 arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1353 arm_vm_page_granular_RWNX(segPLKLINKEDITB, segSizePLKLINKEDIT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Coalesced kext LINKEDIT segment
1354 arm_vm_page_granular_ROX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK); // __LAST may be empty, but we cannot assume this
1355 if (segLASTDATACONSTB) {
1356 arm_vm_page_granular_RWNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // __LASTDATA_CONST may be empty, but we cannot assume this
1357 }
1358 arm_vm_page_granular_RWNX(segPRELINKDATAB, segSizePRELINKDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Prelink __DATA for kexts (RW data)
1359
1360 if (segSizePLKLLVMCOV > 0) {
1361 arm_vm_page_granular_RWNX(segPLKLLVMCOVB, segSizePLKLLVMCOV, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // LLVM code coverage data
1362 }
1363 arm_vm_page_granular_RWNX(segPRELINKINFOB, segSizePRELINKINFO, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* PreLinkInfoDictionary */
1364
1365 /* Record the bounds of the kernelcache. */
1366 vm_kernelcache_base = segLOWEST;
1367 vm_kernelcache_top = end_kern;
1368 }
1369
1370 /*
1371 * return < 0 for a < b
1372 * 0 for a == b
1373 * > 0 for a > b
1374 */
1375 typedef int (*cmpfunc_t)(const void *a, const void *b);
1376
1377 extern void
1378 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1379
1380 static int
1381 cmp_ptov_entries(const void *a, const void *b)
1382 {
1383 const ptov_table_entry *entry_a = a;
1384 const ptov_table_entry *entry_b = b;
1385 // Sort in descending order of segment length
1386 if (entry_a->len < entry_b->len) {
1387 return 1;
1388 } else if (entry_a->len > entry_b->len) {
1389 return -1;
1390 } else {
1391 return 0;
1392 }
1393 }
1394
1395 SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1396
1397 #define ROUND_L1(addr) (((addr) + ARM_TT_L1_OFFMASK) & ~(ARM_TT_L1_OFFMASK))
1398 #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1399
1400 static void
1401 arm_vm_physmap_slide(ptov_table_entry *temp_ptov_table, vm_map_address_t orig_va, vm_size_t len, int pte_prot_APX, unsigned granule)
1402 {
1403 pmap_paddr_t pa_offset;
1404
1405 assert(ptov_index < PTOV_TABLE_SIZE);
1406 assert((orig_va & ARM_PGMASK) == 0);
1407 temp_ptov_table[ptov_index].pa = orig_va - gVirtBase + gPhysBase;
1408 if (ptov_index == 0) {
1409 temp_ptov_table[ptov_index].va = physmap_base;
1410 } else {
1411 temp_ptov_table[ptov_index].va = temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len;
1412 }
1413 if (granule & ARM64_GRANULE_ALLOW_BLOCK) {
1414 vm_map_address_t orig_offset = temp_ptov_table[ptov_index].pa & ARM_TT_TWIG_OFFMASK;
1415 vm_map_address_t new_offset = temp_ptov_table[ptov_index].va & ARM_TT_TWIG_OFFMASK;
1416 if (new_offset < orig_offset) {
1417 temp_ptov_table[ptov_index].va += (orig_offset - new_offset);
1418 } else if (new_offset > orig_offset) {
1419 temp_ptov_table[ptov_index].va = ROUND_TWIG(temp_ptov_table[ptov_index].va) + orig_offset;
1420 }
1421 }
1422 assert((temp_ptov_table[ptov_index].va & ARM_PGMASK) == 0);
1423 temp_ptov_table[ptov_index].len = round_page(len);
1424 pa_offset = temp_ptov_table[ptov_index].va - orig_va;
1425 arm_vm_page_granular_prot(temp_ptov_table[ptov_index].va, temp_ptov_table[ptov_index].len, pa_offset, 1, pte_prot_APX, 1, granule);
1426 ++ptov_index;
1427 }
1428
1429 #if XNU_MONITOR
1430
1431 SECURITY_READ_ONLY_LATE(static boolean_t) keep_linkedit = FALSE;
1432
1433 static void
1434 arm_vm_physmap_init(boot_args *args)
1435 {
1436 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1437 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1438
1439 // This is memory that will either be handed back to the VM layer via ml_static_mfree(),
1440 // or will be available for general-purpose use. Physical aperture mappings for this memory
1441 // must be at page granularity, so that PPL ownership or cache attribute changes can be reflected
1442 // in the physical aperture mappings.
1443
1444 // Slid region between gPhysBase and beginning of protected text
1445 arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA, 0);
1446
1447 // kext bootstrap segment
1448 arm_vm_physmap_slide(temp_ptov_table, segKLDB, segSizeKLD, AP_RONA, 0);
1449
1450 // Early-boot data
1451 arm_vm_physmap_slide(temp_ptov_table, segBOOTDATAB, segSizeBOOTDATA, AP_RONA, 0);
1452
1453 #if KASAN_DYNAMIC_BLACKLIST
1454 /* KASAN's dynamic blacklist needs to query the LINKEDIT segment at runtime. As such, the
1455 * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
1456 keep_linkedit = TRUE;
1457 #else
1458 PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
1459 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1460 keep_linkedit = TRUE;
1461 }
1462 #endif
1463 if (!keep_linkedit) {
1464 // Kernel LINKEDIT
1465 arm_vm_physmap_slide(temp_ptov_table, segLINKB, segSizeLINK, AP_RWNA, 0);
1466
1467 // Prelinked kernel LINKEDIT
1468 arm_vm_physmap_slide(temp_ptov_table, segPLKLINKEDITB, segSizePLKLINKEDIT, AP_RWNA, 0);
1469 }
1470
1471 // Prelinked kernel plists
1472 arm_vm_physmap_slide(temp_ptov_table, segPRELINKINFOB, segSizePRELINKINFO, AP_RWNA, 0);
1473
1474 // Device tree (if not locked down), ramdisk, boot args
1475 arm_vm_physmap_slide(temp_ptov_table, end_kern, (args->topOfKernelData - gPhysBase + gVirtBase) - end_kern, AP_RWNA, 0);
1476 if (!SecureDTIsLockedDown()) {
1477 PE_slide_devicetree(temp_ptov_table[ptov_index - 1].va - end_kern);
1478 }
1479
1480 // Remainder of physical memory
1481 arm_vm_physmap_slide(temp_ptov_table, (args->topOfKernelData - gPhysBase + gVirtBase),
1482 real_avail_end - args->topOfKernelData, AP_RWNA, 0);
1483
1484 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1485
1486 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1487 // segments should be placed earliest in the table to optimize lookup performance.
1488 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1489
1490 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1491 }
1492
1493 #else
1494
1495 static void
1496 arm_vm_physmap_init(boot_args *args)
1497 {
1498 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1499 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1500
1501 // Will be handed back to VM layer through ml_static_mfree() in arm_vm_prot_finalize()
1502 arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA,
1503 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1504
1505 arm_vm_page_granular_RWNX(end_kern, phystokv(args->topOfKernelData) - end_kern,
1506 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* Device Tree (if not locked down), RAM Disk (if present), bootArgs */
1507
1508 arm_vm_physmap_slide(temp_ptov_table, (args->topOfKernelData - gPhysBase + gVirtBase),
1509 real_avail_end - args->topOfKernelData, AP_RWNA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // rest of physmem
1510
1511 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1512
1513 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1514 // segments should be placed earliest in the table to optimize lookup performance.
1515 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1516
1517 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1518 }
1519
1520 #endif // XNU_MONITOR
1521
1522 void
1523 arm_vm_prot_finalize(boot_args * args __unused)
1524 {
1525 /*
1526 * At this point, we are far enough along in the boot process that it will be
1527 * safe to free up all of the memory preceeding the kernel. It may in fact
1528 * be safe to do this earlier.
1529 *
1530 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1531 * as usable.
1532 */
1533
1534 /*
1535 * if old style PRELINK segment exists, free memory before it, and after it before XNU text
1536 * otherwise we're dealing with a new style kernel cache, so we should just free the
1537 * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments
1538 * should be immediately followed by XNU's TEXT segment
1539 */
1540
1541 ml_static_mfree(phystokv(gPhysBase), segLOWEST - gVirtBase);
1542
1543 /*
1544 * KTRR support means we will be mucking with these pages and trying to
1545 * protect them; we cannot free the pages to the VM if we do this.
1546 */
1547 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1548 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1549 ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1550 }
1551
1552 /* tighten permissions on kext read only data and code */
1553 arm_vm_page_granular_RNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK);
1554 arm_vm_page_granular_RNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1555
1556 cpu_stack_alloc(&BootCpuData);
1557 arm64_replace_bootstack(&BootCpuData);
1558 ml_static_mfree(phystokv(segBOOTDATAB - gVirtBase + gPhysBase), segSizeBOOTDATA);
1559
1560 #if __ARM_KERNEL_PROTECT__
1561 arm_vm_populate_kernel_el0_mappings();
1562 #endif /* __ARM_KERNEL_PROTECT__ */
1563
1564 #if XNU_MONITOR
1565 for (vm_offset_t va = segKLDB; va < (segKLDB + segSizeKLD); va += ARM_PGBYTES) {
1566 pt_entry_t *pte = arm_kva_to_pte(va);
1567 *pte = ARM_PTE_EMPTY;
1568 }
1569 /* Clear the original stack mappings; these pages should be mapped through ptov_table. */
1570 for (vm_offset_t va = segBOOTDATAB; va < (segBOOTDATAB + segSizeBOOTDATA); va += ARM_PGBYTES) {
1571 pt_entry_t *pte = arm_kva_to_pte(va);
1572 *pte = ARM_PTE_EMPTY;
1573 }
1574 /* Clear the original PRELINKINFO mapping. This segment should be jettisoned during I/O Kit
1575 * initialization before we reach this point. */
1576 for (vm_offset_t va = segPRELINKINFOB; va < (segPRELINKINFOB + segSizePRELINKINFO); va += ARM_PGBYTES) {
1577 pt_entry_t *pte = arm_kva_to_pte(va);
1578 *pte = ARM_PTE_EMPTY;
1579 }
1580 if (!keep_linkedit) {
1581 for (vm_offset_t va = segLINKB; va < (segLINKB + segSizeLINK); va += ARM_PGBYTES) {
1582 pt_entry_t *pte = arm_kva_to_pte(va);
1583 *pte = ARM_PTE_EMPTY;
1584 }
1585 for (vm_offset_t va = segPLKLINKEDITB; va < (segPLKLINKEDITB + segSizePLKLINKEDIT); va += ARM_PGBYTES) {
1586 pt_entry_t *pte = arm_kva_to_pte(va);
1587 *pte = ARM_PTE_EMPTY;
1588 }
1589 }
1590 #endif /* XNU_MONITOR */
1591
1592 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1593 /*
1594 * __LAST,__pinst should no longer be executable.
1595 */
1596 arm_vm_page_granular_RNX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK);
1597
1598 /* __LASTDATA_CONST should no longer be writable. */
1599 if (segLASTDATACONSTB) {
1600 arm_vm_page_granular_RNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1601 }
1602
1603 /*
1604 * Must wait until all other region permissions are set before locking down DATA_CONST
1605 * as the kernel static page tables live in DATA_CONST on KTRR enabled systems
1606 * and will become immutable.
1607 */
1608 #endif
1609
1610 arm_vm_page_granular_RNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1611
1612 __builtin_arm_dsb(DSB_ISH);
1613 flush_mmu_tlb();
1614 }
1615
1616 #define TBI_USER 0x1
1617 #define TBI_KERNEL 0x2
1618
1619 /*
1620 * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1621 * address accesses. It can be enabled separately for TTBR0 (user) and
1622 * TTBR1 (kernel). We enable it by default for user only.
1623 */
1624 static void
1625 set_tbi(void)
1626 {
1627 #if !__ARM_KERNEL_PROTECT__
1628 uint64_t old_tcr, new_tcr;
1629
1630 old_tcr = new_tcr = get_tcr();
1631 new_tcr |= TCR_TBI0_TOPBYTE_IGNORED;
1632
1633 if (old_tcr != new_tcr) {
1634 set_tcr(new_tcr);
1635 sysreg_restore.tcr_el1 = new_tcr;
1636 }
1637 #endif /* !__ARM_KERNEL_PROTECT__ */
1638 }
1639
1640 /*
1641 * Initialize and enter blank (invalid) page tables in a L1 translation table for a given VA range.
1642 *
1643 * This is a helper function used to build up the initial page tables for the kernel translation table.
1644 * With KERNEL_INTEGRITY we keep at least the root level of the kernel page table immutable, thus the need
1645 * to preallocate before machine_lockdown any L1 entries necessary during the entire kernel runtime.
1646 *
1647 * For a given VA range, if necessary, allocate new L2 translation tables and install the table entries in
1648 * the appropriate L1 table indexes. called before the translation table is active
1649 *
1650 * parameters:
1651 *
1652 * tt: virtual address of L1 translation table to modify
1653 * start: beginning of VA range
1654 * end: end of VA range
1655 * static_map: whether to allocate the new translation table page from read only memory
1656 * table_attrs: attributes of new table entry in addition to VALID and TYPE_TABLE attributes
1657 *
1658 */
1659
1660 static void
1661 init_ptpages(tt_entry_t *tt, vm_map_address_t start, vm_map_address_t end, bool static_map, uint64_t table_attrs)
1662 {
1663 tt_entry_t *l1_tte;
1664 vm_offset_t ptpage_vaddr;
1665
1666 l1_tte = tt + ((start & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1667
1668 while (start < end) {
1669 if (*l1_tte == ARM_TTE_EMPTY) {
1670 /* Allocate a page and setup L1 Table TTE in L1 */
1671 ptpage_vaddr = alloc_ptpage(static_map);
1672 *l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | table_attrs;
1673 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1674 }
1675
1676 if ((start + ARM_TT_L1_SIZE) < start) {
1677 /* If this is the last L1 entry, it must cover the last mapping. */
1678 break;
1679 }
1680
1681 start += ARM_TT_L1_SIZE;
1682 l1_tte++;
1683 }
1684 }
1685
1686 #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1687 #define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1688
1689 void
1690 arm_vm_init(uint64_t memory_size, boot_args * args)
1691 {
1692 vm_map_address_t va_l1, va_l1_end;
1693 tt_entry_t *cpu_l1_tte;
1694 vm_map_address_t va_l2, va_l2_end;
1695 tt_entry_t *cpu_l2_tte;
1696 pmap_paddr_t boot_ttep;
1697 tt_entry_t *boot_tte;
1698 uint64_t mem_segments;
1699 vm_offset_t ptpage_vaddr;
1700 vm_map_address_t dynamic_memory_begin;
1701
1702 /*
1703 * Get the virtual and physical kernel-managed memory base from boot_args.
1704 */
1705 gVirtBase = args->virtBase;
1706 gPhysBase = args->physBase;
1707 #if KASAN
1708 real_phys_size = args->memSize + (shadow_ptop - shadow_pbase);
1709 #else
1710 real_phys_size = args->memSize;
1711 #endif
1712 /*
1713 * Ensure the physical region we specify for the VM to manage ends on a
1714 * software page boundary. Note that the software page size (PAGE_SIZE)
1715 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1716 * We must round the reported memory size down to the nearest PAGE_SIZE
1717 * boundary to ensure the VM does not try to manage a page it does not
1718 * completely own. The KASAN shadow region, if present, is managed entirely
1719 * in units of the hardware page size and should not need similar treatment.
1720 */
1721 gPhysSize = mem_size = ((gPhysBase + args->memSize) & ~PAGE_MASK) - gPhysBase;
1722
1723 mem_actual = args->memSizeActual ? args->memSizeActual : mem_size;
1724
1725 if ((memory_size != 0) && (mem_size > memory_size)) {
1726 mem_size = memory_size;
1727 max_mem_actual = memory_size;
1728 } else {
1729 max_mem_actual = mem_actual;
1730 }
1731 if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 2)) {
1732 panic("Unsupported memory configuration %lx\n", mem_size);
1733 }
1734
1735 #if defined(ARM_LARGE_MEMORY)
1736 unsigned long physmap_l1_entries = ((real_phys_size + ARM64_PHYSMAP_SLIDE_RANGE) >> ARM_TT_L1_SHIFT) + 1;
1737 physmap_base = VM_MIN_KERNEL_ADDRESS - (physmap_l1_entries << ARM_TT_L1_SHIFT);
1738 #else
1739 physmap_base = phystokv(args->topOfKernelData);
1740 #endif
1741
1742 // Slide the physical aperture to a random page-aligned location within the slide range
1743 uint64_t physmap_slide = early_random() & ARM64_PHYSMAP_SLIDE_MASK & ~((uint64_t)PAGE_MASK);
1744 assert(physmap_slide < ARM64_PHYSMAP_SLIDE_RANGE);
1745
1746 physmap_base += physmap_slide;
1747
1748 #if XNU_MONITOR
1749 physmap_base = ROUND_TWIG(physmap_base);
1750 #if defined(ARM_LARGE_MEMORY)
1751 static_memory_end = phystokv(args->topOfKernelData);
1752 #else
1753 static_memory_end = physmap_base + mem_size;
1754 #endif // ARM_LARGE_MEMORY
1755 physmap_end = physmap_base + real_phys_size;
1756 #else
1757 static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment
1758 physmap_end = physmap_base + real_phys_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE);
1759 #endif
1760
1761 #if KASAN && !defined(ARM_LARGE_MEMORY)
1762 /* add the KASAN stolen memory to the physmap */
1763 dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1764 #else
1765 dynamic_memory_begin = static_memory_end;
1766 #endif
1767 #if XNU_MONITOR
1768 pmap_stacks_start = (void*)dynamic_memory_begin;
1769 dynamic_memory_begin += PPL_STACK_REGION_SIZE;
1770 pmap_stacks_end = (void*)dynamic_memory_begin;
1771 #endif
1772 if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) {
1773 panic("Unsupported memory configuration %lx\n", mem_size);
1774 }
1775
1776 boot_tte = (tt_entry_t *)&bootstrap_pagetables;
1777 boot_ttep = kvtophys((vm_offset_t)boot_tte);
1778
1779 #if DEVELOPMENT || DEBUG
1780 /* Sanity check - assert that BOOTSTRAP_TABLE_SIZE is sufficiently-large to
1781 * hold our bootstrap mappings for any possible slide */
1782 size_t bytes_mapped = dynamic_memory_begin - gVirtBase;
1783 size_t l1_entries = 1 + ((bytes_mapped + ARM_TT_L1_SIZE - 1) / ARM_TT_L1_SIZE);
1784 /* 1 L1 each for V=P and KVA, plus 1 page for each L2 */
1785 size_t pages_used = 2 * (l1_entries + 1);
1786 if (pages_used > BOOTSTRAP_TABLE_SIZE) {
1787 panic("BOOTSTRAP_TABLE_SIZE too small for memory config\n");
1788 }
1789 #endif
1790
1791 /*
1792 * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping.
1793 * TTBR1 L1, TTBR1 L2 - kernel mapping
1794 */
1795
1796 /*
1797 * TODO: free bootstrap table memory back to allocator.
1798 * on large memory systems bootstrap tables could be quite large.
1799 * after bootstrap complete, xnu can warm start with a single 16KB page mapping
1800 * to trampoline to KVA. this requires only 3 pages to stay resident.
1801 */
1802 avail_start = args->topOfKernelData;
1803
1804 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1805 arm_replace_identity_map();
1806 #endif
1807
1808 /* Initialize invalid tte page */
1809 invalid_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1810 invalid_ttep = kvtophys((vm_offset_t)invalid_tte);
1811 bzero(invalid_tte, ARM_PGBYTES);
1812
1813 /*
1814 * Initialize l1 page table page
1815 */
1816 cpu_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1817 cpu_ttep = kvtophys((vm_offset_t)cpu_tte);
1818 bzero(cpu_tte, ARM_PGBYTES);
1819 avail_end = gPhysBase + mem_size;
1820 assert(!(avail_end & PAGE_MASK));
1821
1822 #if KASAN
1823 real_avail_end = gPhysBase + real_phys_size;
1824 #else
1825 real_avail_end = avail_end;
1826 #endif
1827
1828 /*
1829 * Initialize l1 and l2 page table pages :
1830 * map physical memory at the kernel base virtual address
1831 * cover the kernel dynamic address range section
1832 *
1833 * the so called physical aperture should be statically mapped
1834 */
1835 init_ptpages(cpu_tte, gVirtBase, dynamic_memory_begin, TRUE, 0);
1836
1837 #if defined(ARM_LARGE_MEMORY)
1838 /*
1839 * Initialize l1 page table pages :
1840 * on large memory systems the physical aperture exists separately below
1841 * the rest of the kernel virtual address space
1842 */
1843 init_ptpages(cpu_tte, physmap_base, ROUND_L1(physmap_end), TRUE, ARM_DYNAMIC_TABLE_XN);
1844 #endif
1845
1846
1847 #if __ARM_KERNEL_PROTECT__
1848 /* Expand the page tables to prepare for the EL0 mappings. */
1849 arm_vm_expand_kernel_el0_mappings();
1850 #endif /* __ARM_KERNEL_PROTECT__ */
1851
1852 /*
1853 * Now retrieve addresses for various segments from kernel mach-o header
1854 */
1855 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1856 segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1857 segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1858 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1859 segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1860 segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1861 #if XNU_MONITOR
1862 segPPLTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTEXT", &segSizePPLTEXT);
1863 segPPLTRAMPB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTRAMP", &segSizePPLTRAMP);
1864 segPPLDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA_CONST", &segSizePPLDATACONST);
1865 #endif
1866 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
1867 #if XNU_MONITOR
1868 segPPLDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA", &segSizePPLDATA);
1869 #endif
1870
1871 segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
1872 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
1873 segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
1874 segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
1875 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
1876 segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
1877 segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
1878 segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
1879 segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
1880
1881 sectHIBTEXTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__TEXT_EXEC", "__hib_text", &sectSizeHIBTEXT);
1882 sectHIBDATACONSTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__DATA_CONST", "__hib_const", &sectSizeHIBDATACONST);
1883 segHIBDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__HIBDATA", &segSizeHIBDATA);
1884
1885 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1886 kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
1887
1888 // fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
1889 segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
1890 assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC); // kernel PLK_TEXT_EXEC must be empty
1891 assert(segLASTB && segSizeLAST); // kernel LAST must not be empty
1892 assert(segKCTEXTEXECB <= segLASTB); // KC TEXT_EXEC must contain kernel LAST
1893 assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
1894 segPLKTEXTEXECB = segLASTB + segSizeLAST;
1895 segSizePLKTEXTEXEC = segSizeKCTEXTEXEC - (segPLKTEXTEXECB - segKCTEXTEXECB);
1896
1897 // fileset has kext PLK_DATA_CONST under kernel collection DATA_CONST following kernel's LASTDATA_CONST
1898 segKCDATACONSTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA_CONST", &segSizeKCDATACONST);
1899 assert(segPLKDATACONSTB && !segSizePLKDATACONST); // kernel PLK_DATA_CONST must be empty
1900 assert(segLASTDATACONSTB && segSizeLASTDATACONST); // kernel LASTDATA_CONST must be non-empty
1901 assert(segKCDATACONSTB <= segLASTDATACONSTB); // KC DATA_CONST must contain kernel LASTDATA_CONST
1902 assert(segKCDATACONSTB + segSizeKCDATACONST >= segLASTDATACONSTB + segSizeLASTDATACONST);
1903 segPLKDATACONSTB = segLASTDATACONSTB + segSizeLASTDATACONST;
1904 segSizePLKDATACONST = segSizeKCDATACONST - (segPLKDATACONSTB - segKCDATACONSTB);
1905
1906 // fileset has kext PRELINK_DATA under kernel collection DATA following kernel's empty PRELINK_DATA
1907 segKCDATAB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA", &segSizeKCDATA);
1908 assert(segPRELINKDATAB && !segSizePRELINKDATA); // kernel PRELINK_DATA must be empty
1909 assert(segKCDATAB <= segPRELINKDATAB); // KC DATA must contain kernel PRELINK_DATA
1910 assert(segKCDATAB + segSizeKCDATA >= segPRELINKDATAB + segSizePRELINKDATA);
1911 segSizePRELINKDATA = segSizeKCDATA - (segPRELINKDATAB - segKCDATAB);
1912
1913 // fileset has consolidated PRELINK_TEXT, PRELINK_INFO and LINKEDIT at the kernel collection level
1914 assert(segPRELINKTEXTB && !segSizePRELINKTEXT); // kernel PRELINK_TEXT must be empty
1915 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1916 assert(segPRELINKINFOB && !segSizePRELINKINFO); // kernel PRELINK_INFO must be empty
1917 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_INFO", &segSizePRELINKINFO);
1918 segLINKB = (vm_offset_t) getsegdatafromheader(kc_mh, "__LINKEDIT", &segSizeLINK);
1919 }
1920
1921 (void) PE_parse_boot_argn("use_contiguous_hint", &use_contiguous_hint, sizeof(use_contiguous_hint));
1922 assert(segSizePRELINKTEXT < 0x03000000); /* 23355738 */
1923
1924 /* if one of the new segments is present, the other one better be as well */
1925 if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
1926 assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
1927 }
1928
1929 etext = (vm_offset_t) segTEXTB + segSizeTEXT;
1930 sdata = (vm_offset_t) segDATAB;
1931 edata = (vm_offset_t) segDATAB + segSizeDATA;
1932 end_kern = round_page(segHIGHESTKC ? segHIGHESTKC : getlastaddr()); /* Force end to next page */
1933
1934 vm_set_page_size();
1935
1936 vm_kernel_base = segTEXTB;
1937 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
1938 vm_kext_base = segPRELINKTEXTB;
1939 vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
1940
1941 vm_prelink_stext = segPRELINKTEXTB;
1942 if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
1943 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
1944 } else {
1945 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
1946 }
1947 vm_prelink_sinfo = segPRELINKINFOB;
1948 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
1949 vm_slinkedit = segLINKB;
1950 vm_elinkedit = segLINKB + segSizeLINK;
1951
1952 vm_prelink_sdata = segPRELINKDATAB;
1953 vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
1954
1955 arm_vm_prot_init(args);
1956
1957 vm_page_kernelcache_count = (unsigned int) (atop_64(end_kern - segLOWEST));
1958
1959 /*
1960 * Initialize the page tables for the low globals:
1961 * cover this address range:
1962 * LOW_GLOBAL_BASE_ADDRESS + 2MB
1963 */
1964 va_l1 = va_l2 = LOW_GLOBAL_BASE_ADDRESS;
1965 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1966 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1967 ptpage_vaddr = alloc_ptpage(TRUE);
1968 *cpu_l2_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN;
1969 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1970
1971 /*
1972 * Initialize l2 page table pages :
1973 * cover this address range:
1974 * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS
1975 */
1976 #if defined(ARM_LARGE_MEMORY)
1977 /*
1978 * dynamic mapped memory outside the VM allocator VA range required to bootstrap VM system
1979 * don't expect to exceed 64GB, no sense mapping any more space between here and the VM heap range
1980 */
1981 init_ptpages(cpu_tte, dynamic_memory_begin, ROUND_L1(dynamic_memory_begin), FALSE, ARM_DYNAMIC_TABLE_XN);
1982 #else
1983 /*
1984 * TODO: do these pages really need to come from RO memory?
1985 * With legacy 3 level table systems we never mapped more than a single L1 entry so this may be dead code
1986 */
1987 init_ptpages(cpu_tte, dynamic_memory_begin, VM_MAX_KERNEL_ADDRESS, TRUE, ARM_DYNAMIC_TABLE_XN);
1988 #endif
1989
1990 #if KASAN
1991 /* record the extent of the physmap */
1992 physmap_vbase = physmap_base;
1993 physmap_vtop = physmap_end;
1994 kasan_init();
1995 #endif /* KASAN */
1996
1997 #if MONOTONIC
1998 mt_early_init();
1999 #endif /* MONOTONIC */
2000
2001 set_tbi();
2002
2003 arm_vm_physmap_init(args);
2004 set_mmu_ttb_alternate(cpu_ttep & TTBR_BADDR_MASK);
2005
2006
2007 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
2008
2009 flush_mmu_tlb();
2010 #if defined(HAS_VMSA_LOCK)
2011 vmsa_lock();
2012 #endif
2013 kva_active = TRUE;
2014 // global table pointers may need to be different due to physical aperture remapping
2015 cpu_tte = (tt_entry_t*)(phystokv(cpu_ttep));
2016 invalid_tte = (tt_entry_t*)(phystokv(invalid_ttep));
2017
2018 // From here on out, we're off the bootstrap translation tables.
2019
2020
2021 /* AuxKC initialization has to be deferred until this point, since
2022 * the AuxKC may not have been fully mapped in the bootstrap
2023 * tables, if it spilled downwards into the prior L2 block.
2024 *
2025 * Now that its mapping set up by arm_vm_prot_init() is active,
2026 * we can traverse and fix it up.
2027 */
2028
2029 if (arm_vm_auxkc_init()) {
2030 if (segLOWESTROAuxKC < segLOWESTRO) {
2031 segLOWESTRO = segLOWESTROAuxKC;
2032 }
2033 if (segHIGHESTROAuxKC > segHIGHESTRO) {
2034 segHIGHESTRO = segHIGHESTROAuxKC;
2035 }
2036 if (segLOWESTRXAuxKC < segLOWESTTEXT) {
2037 segLOWESTTEXT = segLOWESTRXAuxKC;
2038 }
2039 assert(segLOWEST == segLOWESTAuxKC);
2040
2041 // The preliminary auxKC mapping has been broken up.
2042 flush_mmu_tlb();
2043 }
2044
2045 sane_size = mem_size - (avail_start - gPhysBase);
2046 max_mem = mem_size;
2047 vm_kernel_slid_base = segLOWESTTEXT;
2048 vm_kernel_slid_top = vm_prelink_einfo;
2049 // vm_kernel_slide is set by arm_init()->arm_slide_rebase_and_sign_image()
2050 vm_kernel_stext = segTEXTB;
2051
2052 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
2053 // fileset has kext TEXT before kernel DATA_CONST
2054 assert(segTEXTEXECB == segTEXTB + segSizeTEXT);
2055 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeTEXTEXEC;
2056 } else {
2057 assert(segDATACONSTB == segTEXTB + segSizeTEXT);
2058 assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
2059 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
2060 }
2061
2062 dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
2063 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
2064 // reserve a 32MB region without permission overrides to use later for a CTRR unit test
2065 {
2066 extern vm_offset_t ctrr_test_page;
2067 tt_entry_t *new_tte;
2068
2069 ctrr_test_page = dynamic_memory_begin;
2070 dynamic_memory_begin += ARM_TT_L2_SIZE;
2071 cpu_l1_tte = cpu_tte + ((ctrr_test_page & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2072 assert((*cpu_l1_tte) & ARM_TTE_VALID);
2073 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((ctrr_test_page & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2074 assert((*cpu_l2_tte) == ARM_TTE_EMPTY);
2075 new_tte = (tt_entry_t *)alloc_ptpage(FALSE);
2076 bzero(new_tte, ARM_PGBYTES);
2077 *cpu_l2_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
2078 }
2079 #endif /* defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST) */
2080 #if XNU_MONITOR
2081 for (vm_offset_t cur = (vm_offset_t)pmap_stacks_start; cur < (vm_offset_t)pmap_stacks_end; cur += ARM_PGBYTES) {
2082 arm_vm_map(cpu_tte, cur, ARM_PTE_EMPTY);
2083 }
2084 #endif
2085 pmap_bootstrap(dynamic_memory_begin);
2086
2087 disable_preemption();
2088
2089 /*
2090 * Initialize l3 page table pages :
2091 * cover this address range:
2092 * 2MB + FrameBuffer size + 10MB for each 256MB segment
2093 */
2094
2095 mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
2096
2097 va_l1 = dynamic_memory_begin;
2098 va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
2099 va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
2100 va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
2101
2102 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2103
2104 while (va_l1 < va_l1_end) {
2105 va_l2 = va_l1;
2106
2107 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2108 /* If this is the last L1 entry, it must cover the last mapping. */
2109 va_l2_end = va_l1_end;
2110 } else {
2111 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2112 }
2113
2114 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2115
2116 while (va_l2 < va_l2_end) {
2117 pt_entry_t * ptp;
2118 pmap_paddr_t ptp_phys;
2119
2120 /* Allocate a page and setup L3 Table TTE in L2 */
2121 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2122 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
2123
2124 bzero(ptp, ARM_PGBYTES);
2125 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2126
2127 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2128
2129 va_l2 += ARM_TT_L2_SIZE;
2130 cpu_l2_tte++;
2131 }
2132
2133 va_l1 = va_l2_end;
2134 cpu_l1_tte++;
2135 }
2136
2137 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2138 /*
2139 * In this configuration, the bootstrap mappings (arm_vm_init) and
2140 * the heap mappings occupy separate L1 regions. Explicitly set up
2141 * the heap L1 allocations here.
2142 */
2143 #if defined(ARM_LARGE_MEMORY)
2144 init_ptpages(cpu_tte, KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN);
2145 #else // defined(ARM_LARGE_MEMORY)
2146 va_l1 = VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK;
2147 init_ptpages(cpu_tte, VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN);
2148 #endif // defined(ARM_LARGE_MEMORY)
2149 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2150
2151 /*
2152 * Initialize l3 page table pages :
2153 * cover this address range:
2154 * ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA) to VM_MAX_KERNEL_ADDRESS
2155 */
2156 va_l1 = (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA;
2157 va_l1_end = VM_MAX_KERNEL_ADDRESS;
2158
2159 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2160
2161 while (va_l1 < va_l1_end) {
2162 va_l2 = va_l1;
2163
2164 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2165 /* If this is the last L1 entry, it must cover the last mapping. */
2166 va_l2_end = va_l1_end;
2167 } else {
2168 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2169 }
2170
2171 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2172
2173 while (va_l2 < va_l2_end) {
2174 pt_entry_t * ptp;
2175 pmap_paddr_t ptp_phys;
2176
2177 /* Allocate a page and setup L3 Table TTE in L2 */
2178 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2179 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
2180
2181 bzero(ptp, ARM_PGBYTES);
2182 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2183
2184 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2185
2186 va_l2 += ARM_TT_L2_SIZE;
2187 cpu_l2_tte++;
2188 }
2189
2190 va_l1 = va_l2_end;
2191 cpu_l1_tte++;
2192 }
2193
2194
2195 /*
2196 * Adjust avail_start so that the range that the VM owns
2197 * starts on a PAGE_SIZE aligned boundary.
2198 */
2199 avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
2200
2201 #if XNU_MONITOR
2202 pmap_static_allocations_done();
2203 #endif
2204 first_avail = avail_start;
2205 patch_low_glo_static_region(args->topOfKernelData, avail_start - args->topOfKernelData);
2206 enable_preemption();
2207 }