]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm64/arm_vm_init.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / osfmk / arm64 / arm_vm_init.c
1 /*
2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_debug.h>
30 #include <mach_kdp.h>
31 #include <debug.h>
32
33 #include <kern/assert.h>
34 #include <kern/misc_protos.h>
35 #include <kern/monotonic.h>
36 #include <mach/vm_types.h>
37 #include <mach/vm_param.h>
38 #include <vm/vm_kern.h>
39 #include <vm/vm_page.h>
40 #include <vm/pmap.h>
41
42 #include <machine/atomic.h>
43 #include <arm64/proc_reg.h>
44 #include <arm64/lowglobals.h>
45 #include <arm/cpu_data_internal.h>
46 #include <arm/misc_protos.h>
47 #include <pexpert/arm64/boot.h>
48 #include <pexpert/device_tree.h>
49
50 #include <libkern/kernel_mach_header.h>
51 #include <libkern/section_keywords.h>
52
53 #include <san/kasan.h>
54
55 #if __ARM_KERNEL_PROTECT__
56 /*
57 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
58 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
59 * range covered by TTBR1 in half). This must also cover the exception vectors.
60 */
61 static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
62
63 /* The exception vectors and the kernel cannot share root TTEs. */
64 static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
65
66 /*
67 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
68 * the exception vectors.
69 */
70 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
71 #endif /* __ARM_KERNEL_PROTECT__ */
72
73 #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
74
75 #if KASAN
76 extern vm_offset_t shadow_pbase;
77 extern vm_offset_t shadow_ptop;
78 extern vm_offset_t physmap_vbase;
79 extern vm_offset_t physmap_vtop;
80 #endif
81
82 /*
83 * Denotes the end of xnu.
84 */
85 extern void *last_kernel_symbol;
86
87 extern void arm64_replace_bootstack(cpu_data_t*);
88 extern void PE_slide_devicetree(vm_offset_t);
89
90 /*
91 * KASLR parameters
92 */
93 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
94 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
95 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
96 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
97 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
98 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
99 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
100 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
101 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
102
103 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
104 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
105 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
106 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
107 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
108 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
109 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
110 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
111
112 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
113 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
114
115 /* Used by <mach/arm/vm_param.h> */
116 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
117 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
118 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
119 SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
120 SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
121
122 /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
123 * all kexts before the kernel. This is only for arm64 devices and looks
124 * something like the following:
125 * -- vmaddr order --
126 * 0xffffff8004004000 __PRELINK_TEXT
127 * 0xffffff8007004000 __TEXT (xnu)
128 * 0xffffff80075ec000 __DATA (xnu)
129 * 0xffffff80076dc000 __KLD (xnu)
130 * 0xffffff80076e0000 __LAST (xnu)
131 * 0xffffff80076e4000 __LINKEDIT (xnu)
132 * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
133 * 0xffffff800782c000 __PRELINK_INFO
134 * 0xffffff80078e4000 -- End of kernelcache
135 */
136
137 /* 24921709 - make XNU ready for KTRR
138 *
139 * Two possible kernel cache layouts, depending on which kcgen is being used.
140 * VAs increasing downwards.
141 * Old KCGEN:
142 *
143 * __PRELINK_TEXT
144 * __TEXT
145 * __DATA_CONST
146 * __TEXT_EXEC
147 * __KLD
148 * __LAST
149 * __DATA
150 * __PRELINK_DATA (expected empty)
151 * __LINKEDIT
152 * __PRELINK_INFO
153 *
154 * New kcgen:
155 *
156 * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment
157 * __PLK_DATA_CONST
158 * __PLK_TEXT_EXEC
159 * __TEXT
160 * __DATA_CONST
161 * __TEXT_EXEC
162 * __KLD
163 * __LAST <--- Last KTRR (ReadOnly) segment
164 * __DATA
165 * __BOOTDATA (if present)
166 * __LINKEDIT
167 * __PRELINK_DATA (expected populated now)
168 * __PLK_LINKEDIT
169 * __PRELINK_INFO
170 *
171 */
172
173 vm_offset_t mem_size; /* Size of actual physical memory present
174 * minus any performance buffer and possibly
175 * limited by mem_limit in bytes */
176 uint64_t mem_actual; /* The "One True" physical memory size
177 * actually, it's the highest physical
178 * address + 1 */
179 uint64_t max_mem; /* Size of physical memory (bytes), adjusted
180 * by maxmem */
181 uint64_t sane_size; /* Memory size to use for defaults
182 * calculations */
183 /* This no longer appears to be used; kill it? */
184 addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
185 * virtual address known
186 * to the VM system */
187
188 SECURITY_READ_ONLY_LATE(vm_offset_t) segEXTRADATA;
189 SECURITY_READ_ONLY_LATE(unsigned long) segSizeEXTRADATA;
190
191 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT;
192 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWEST;
193
194 SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
195 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
196
197
198 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
199 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
200
201 SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTEXECB;
202 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXTEXEC;
203
204 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
205 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
206
207
208 SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
209 SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
210 extern vm_offset_t intstack_low_guard;
211 extern vm_offset_t intstack_high_guard;
212 extern vm_offset_t excepstack_high_guard;
213
214 SECURITY_READ_ONLY_LATE(static vm_offset_t) segLINKB;
215 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
216
217 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
218 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLD;
219 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
220 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
221
222 SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB;
223 SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT;
224
225 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB;
226 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
227
228 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB;
229 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
230
231 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB;
232 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
233
234 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0;
235 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
236
237 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB;
238 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
239
240 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB;
241 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
242
243 SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
244
245 SECURITY_READ_ONLY_LATE(unsigned) PAGE_SHIFT_CONST;
246
247 SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
248 SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
249 SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
250 SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
251
252 vm_offset_t alloc_ptpage(boolean_t map_static);
253 SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
254
255 /*
256 * Bootstrap the system enough to run with virtual memory.
257 * Map the kernel's code and data, and allocate the system page table.
258 * Page_size must already be set.
259 *
260 * Parameters:
261 * first_avail: first available physical page -
262 * after kernel page tables
263 * avail_start: PA of first physical page
264 * avail_end: PA of last physical page
265 */
266 SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail;
267 SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end;
268 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start;
269 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end;
270 SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end;
271 SECURITY_READ_ONLY_LATE(unsigned long) real_phys_size;
272
273 #if __ARM_KERNEL_PROTECT__
274 extern void ExceptionVectorsBase;
275 extern void ExceptionVectorsEnd;
276 #endif /* __ARM_KERNEL_PROTECT__ */
277
278 typedef struct {
279 pmap_paddr_t pa;
280 vm_map_address_t va;
281 vm_size_t len;
282 } ptov_table_entry;
283
284 #define PTOV_TABLE_SIZE 8
285 SECURITY_READ_ONLY_LATE(static ptov_table_entry) ptov_table[PTOV_TABLE_SIZE];
286 SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE;
287
288
289 vm_map_address_t
290 phystokv(pmap_paddr_t pa)
291 {
292 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
293 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len)))
294 return (pa - ptov_table[i].pa + ptov_table[i].va);
295 }
296 assertf((pa - gPhysBase) < real_phys_size, "%s: illegal PA: 0x%llx", __func__, (uint64_t)pa);
297 return (pa - gPhysBase + gVirtBase);
298 }
299
300 vm_map_address_t
301 phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
302 {
303 vm_size_t len;
304 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
305 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
306 len = ptov_table[i].len - (pa - ptov_table[i].pa);
307 if (*max_len > len)
308 *max_len = len;
309 return (pa - ptov_table[i].pa + ptov_table[i].va);
310 }
311 }
312 len = PAGE_SIZE - (pa & PAGE_MASK);
313 if (*max_len > len)
314 *max_len = len;
315 assertf((pa - gPhysBase) < real_phys_size, "%s: illegal PA: 0x%llx", __func__, (uint64_t)pa);
316 return (pa - gPhysBase + gVirtBase);
317 }
318
319 vm_offset_t
320 ml_static_vtop(vm_offset_t va)
321 {
322 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
323 if ((va >= ptov_table[i].va) && (va < (ptov_table[i].va + ptov_table[i].len)))
324 return (va - ptov_table[i].va + ptov_table[i].pa);
325 }
326 assertf(((vm_address_t)(va) - gVirtBase) < gPhysSize, "%s: illegal VA: %p", __func__, (void*)va);
327 return ((vm_address_t)(va) - gVirtBase + gPhysBase);
328 }
329
330 /*
331 * This rounds the given address up to the nearest boundary for a PTE contiguous
332 * hint.
333 */
334 static vm_offset_t
335 round_up_pte_hint_address(vm_offset_t address)
336 {
337 vm_offset_t hint_size = ARM_PTE_SIZE << ARM_PTE_HINT_ENTRIES_SHIFT;
338 return ((address + (hint_size - 1)) & ~(hint_size - 1));
339 }
340
341 /* allocate a page for a page table: we support static and dynamic mappings.
342 *
343 * returns a virtual address for the allocated page
344 *
345 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
346 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
347 *
348 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
349 */
350
351 vm_offset_t alloc_ptpage(boolean_t map_static) {
352 vm_offset_t vaddr;
353
354 #if !(defined(KERNEL_INTEGRITY_KTRR))
355 map_static = FALSE;
356 #endif
357
358 if (!ropage_next) {
359 ropage_next = (vm_offset_t)&ropagetable_begin;
360 }
361
362 if (map_static) {
363 assert(ropage_next < (vm_offset_t)&ropagetable_end);
364
365 vaddr = ropage_next;
366 ropage_next += ARM_PGBYTES;
367
368 return vaddr;
369 } else {
370 vaddr = phystokv(avail_start);
371 avail_start += ARM_PGBYTES;
372
373 return vaddr;
374 }
375 }
376
377 #if DEBUG
378
379 void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out);
380
381 void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out) {
382 unsigned int i;
383 boolean_t cur_ro, prev_ro = 0;
384 int start_entry = -1;
385 tt_entry_t cur, prev = 0;
386 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
387 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
388 boolean_t tt_static = kvtophys((vm_offset_t)tt) >= robegin &&
389 kvtophys((vm_offset_t)tt) < roend;
390
391 for(i=0; i<TTE_PGENTRIES; i++) {
392 int tte_type = tt[i] & ARM_TTE_TYPE_MASK;
393 cur = tt[i] & ARM_TTE_TABLE_MASK;
394
395 if (tt_static) {
396 /* addresses mapped by this entry are static if it is a block mapping,
397 * or the table was allocated from the RO page table region */
398 cur_ro = (tte_type == ARM_TTE_TYPE_BLOCK) || (cur >= robegin && cur < roend);
399 } else {
400 cur_ro = 0;
401 }
402
403 if ((cur == 0 && prev != 0) || (cur_ro != prev_ro && prev != 0)) { // falling edge
404 uintptr_t start,end,sz;
405
406 start = (uintptr_t)start_entry << ARM_TT_L2_SHIFT;
407 start += tt_base;
408 end = ((uintptr_t)i << ARM_TT_L2_SHIFT) - 1;
409 end += tt_base;
410
411 sz = end - start + 1;
412 printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n",
413 indent*4, "",
414 (uint32_t)(start >> 32),(uint32_t)start,
415 (uint32_t)(end >> 32),(uint32_t)end,
416 prev_ro ? "Static " : "Dynamic",
417 (sz >> 20));
418
419 if (prev_ro) {
420 *rosz_out += sz;
421 } else {
422 *rwsz_out += sz;
423 }
424 }
425
426 if ((prev == 0 && cur != 0) || cur_ro != prev_ro) { // rising edge: set start
427 start_entry = i;
428 }
429
430 prev = cur;
431 prev_ro = cur_ro;
432 }
433 }
434
435 void dump_kva_space() {
436 uint64_t tot_rosz=0, tot_rwsz=0;
437 int ro_ptpages, rw_ptpages;
438 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
439 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
440 boolean_t root_static = kvtophys((vm_offset_t)cpu_tte) >= robegin &&
441 kvtophys((vm_offset_t)cpu_tte) < roend;
442 uint64_t kva_base = ~((1ULL << (64 - T1SZ_BOOT)) - 1);
443
444 printf("Root page table: %s\n", root_static ? "Static" : "Dynamic");
445
446 for(unsigned int i=0; i<TTE_PGENTRIES; i++) {
447 pmap_paddr_t cur;
448 boolean_t cur_ro;
449 uintptr_t start,end;
450 uint64_t rosz = 0, rwsz = 0;
451
452 if ((cpu_tte[i] & ARM_TTE_VALID) == 0)
453 continue;
454
455 cur = cpu_tte[i] & ARM_TTE_TABLE_MASK;
456 start = (uint64_t)i << ARM_TT_L1_SHIFT;
457 start = start + kva_base;
458 end = start + (ARM_TT_L1_SIZE - 1);
459 cur_ro = cur >= robegin && cur < roend;
460
461 printf("0x%08x_%08x-0x%08x_%08x %s\n",
462 (uint32_t)(start >> 32),(uint32_t)start,
463 (uint32_t)(end >> 32),(uint32_t)end,
464 cur_ro ? "Static " : "Dynamic");
465
466 dump_kva_l2(start, (tt_entry_t*)phystokv(cur), 1, &rosz, &rwsz);
467 tot_rosz += rosz;
468 tot_rwsz += rwsz;
469 }
470
471 printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n",
472 tot_rosz >> 20,
473 tot_rwsz >> 20,
474 (tot_rosz >> 20) + (tot_rwsz >> 20));
475
476 ro_ptpages = (int)((ropage_next - (vm_offset_t)&ropagetable_begin) >> ARM_PGSHIFT);
477 rw_ptpages = (int)(lowGlo.lgStaticSize >> ARM_PGSHIFT);
478 printf("Pages used: static %d dynamic %d\n", ro_ptpages, rw_ptpages);
479 }
480
481 #endif /* DEBUG */
482
483 #if __ARM_KERNEL_PROTECT__
484 /*
485 * arm_vm_map:
486 * root_ttp: The kernel virtual address for the root of the target page tables
487 * vaddr: The target virtual address
488 * pte: A page table entry value (may be ARM_PTE_EMPTY)
489 *
490 * This function installs pte at vaddr in root_ttp. Any page table pages needed
491 * to install pte will be allocated by this function.
492 */
493 static void
494 arm_vm_map(tt_entry_t * root_ttp, vm_offset_t vaddr, pt_entry_t pte)
495 {
496 vm_offset_t ptpage = 0;
497 tt_entry_t * ttp = root_ttp;
498
499 tt_entry_t * l1_ttep = NULL;
500 tt_entry_t l1_tte = 0;
501
502 tt_entry_t * l2_ttep = NULL;
503 tt_entry_t l2_tte = 0;
504 pt_entry_t * ptep = NULL;
505 pt_entry_t cpte = 0;
506
507 /*
508 * Walk the target page table to find the PTE for the given virtual
509 * address. Allocate any page table pages needed to do this.
510 */
511 l1_ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
512 l1_tte = *l1_ttep;
513
514 if (l1_tte == ARM_TTE_EMPTY) {
515 ptpage = alloc_ptpage(TRUE);
516 bzero((void *)ptpage, ARM_PGBYTES);
517 l1_tte = kvtophys(ptpage);
518 l1_tte &= ARM_TTE_TABLE_MASK;
519 l1_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
520 *l1_ttep = l1_tte;
521 ptpage = 0;
522 }
523
524 ttp = (tt_entry_t *)phystokv(l1_tte & ARM_TTE_TABLE_MASK);
525
526 l2_ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
527 l2_tte = *l2_ttep;
528
529 if (l2_tte == ARM_TTE_EMPTY) {
530 ptpage = alloc_ptpage(TRUE);
531 bzero((void *)ptpage, ARM_PGBYTES);
532 l2_tte = kvtophys(ptpage);
533 l2_tte &= ARM_TTE_TABLE_MASK;
534 l2_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
535 *l2_ttep = l2_tte;
536 ptpage = 0;
537 }
538
539 ttp = (tt_entry_t *)phystokv(l2_tte & ARM_TTE_TABLE_MASK);
540
541 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
542 cpte = *ptep;
543
544 /*
545 * If the existing PTE is not empty, then we are replacing a valid
546 * mapping.
547 */
548 if (cpte != ARM_PTE_EMPTY) {
549 panic("%s: cpte=%#llx is not empty, "
550 "vaddr=%#lx, pte=%#llx",
551 __FUNCTION__, cpte,
552 vaddr, pte);
553 }
554
555 *ptep = pte;
556 }
557
558 #endif // __ARM_KERNEL_PROTECT
559
560 #if __ARM_KERNEL_PROTECT__
561
562 /*
563 * arm_vm_kernel_el0_map:
564 * vaddr: The target virtual address
565 * pte: A page table entry value (may be ARM_PTE_EMPTY)
566 *
567 * This function installs pte at vaddr for the EL0 kernel mappings.
568 */
569 static void
570 arm_vm_kernel_el0_map(vm_offset_t vaddr, pt_entry_t pte)
571 {
572 /* Calculate where vaddr will be in the EL1 kernel page tables. */
573 vm_offset_t kernel_pmap_vaddr = vaddr - ((ARM_TT_ROOT_INDEX_MASK + ARM_TT_ROOT_SIZE) / 2ULL);
574 arm_vm_map(cpu_tte, kernel_pmap_vaddr, pte);
575 }
576
577 /*
578 * arm_vm_kernel_el1_map:
579 * vaddr: The target virtual address
580 * pte: A page table entry value (may be ARM_PTE_EMPTY)
581 *
582 * This function installs pte at vaddr for the EL1 kernel mappings.
583 */
584 static void
585 arm_vm_kernel_el1_map(vm_offset_t vaddr, pt_entry_t pte) {
586 arm_vm_map(cpu_tte, vaddr, pte);
587 }
588
589 /*
590 * arm_vm_kernel_pte:
591 * vaddr: The target virtual address
592 *
593 * This function returns the PTE value for the given vaddr from the kernel page
594 * tables. If the region has been been block mapped, we return what an
595 * equivalent PTE value would be (as regards permissions and flags). We also
596 * remove the HINT bit (as we are not necessarily creating contiguous mappings.
597 */
598 static pt_entry_t
599 arm_vm_kernel_pte(vm_offset_t vaddr)
600 {
601 tt_entry_t * ttp = cpu_tte;
602 tt_entry_t * ttep = NULL;
603 tt_entry_t tte = 0;
604 pt_entry_t * ptep = NULL;
605 pt_entry_t pte = 0;
606
607 ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
608 tte = *ttep;
609
610 assert(tte & ARM_TTE_VALID);
611
612 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
613 /* This is a block mapping; return the equivalent PTE value. */
614 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
615 pte |= ARM_PTE_TYPE_VALID;
616 pte |= vaddr & ((ARM_TT_L1_SIZE - 1) & ARM_PTE_PAGE_MASK);
617 pte &= ~ARM_PTE_HINT_MASK;
618 return pte;
619 }
620
621 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
622 ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
623 tte = *ttep;
624
625 assert(tte & ARM_TTE_VALID);
626
627 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
628 /* This is a block mapping; return the equivalent PTE value. */
629 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
630 pte |= ARM_PTE_TYPE_VALID;
631 pte |= vaddr & ((ARM_TT_L2_SIZE - 1) & ARM_PTE_PAGE_MASK);
632 pte &= ~ARM_PTE_HINT_MASK;
633 return pte;
634 }
635
636 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
637
638 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
639 pte = *ptep;
640 pte &= ~ARM_PTE_HINT_MASK;
641 return pte;
642 }
643
644 /*
645 * arm_vm_prepare_kernel_el0_mappings:
646 * alloc_only: Indicates if PTE values should be copied from the EL1 kernel
647 * mappings.
648 *
649 * This function expands the kernel page tables to support the EL0 kernel
650 * mappings, and conditionally installs the PTE values for the EL0 kernel
651 * mappings (if alloc_only is false).
652 */
653 static void
654 arm_vm_prepare_kernel_el0_mappings(bool alloc_only)
655 {
656 pt_entry_t pte = 0;
657 vm_offset_t start = ((vm_offset_t)&ExceptionVectorsBase) & ~PAGE_MASK;
658 vm_offset_t end = (((vm_offset_t)&ExceptionVectorsEnd) + PAGE_MASK) & ~PAGE_MASK;
659 vm_offset_t cur = 0;
660 vm_offset_t cur_fixed = 0;
661
662 /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */
663 for (cur = start, cur_fixed = ARM_KERNEL_PROTECT_EXCEPTION_START; cur < end; cur += ARM_PGBYTES, cur_fixed += ARM_PGBYTES) {
664 /*
665 * We map the exception vectors at a different address than that
666 * of the kernelcache to avoid sharing page table pages with the
667 * kernelcache (as this may cause issues with TLB caching of
668 * page table pages.
669 */
670 if (!alloc_only) {
671 pte = arm_vm_kernel_pte(cur);
672 }
673
674 arm_vm_kernel_el1_map(cur_fixed, pte);
675 arm_vm_kernel_el0_map(cur_fixed, pte);
676 }
677
678 __builtin_arm_dmb(DMB_ISH);
679 __builtin_arm_isb(ISB_SY);
680
681 if (!alloc_only) {
682 /*
683 * If we have created the alternate exception vector mappings,
684 * the boot CPU may now switch over to them.
685 */
686 set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START);
687 __builtin_arm_isb(ISB_SY);
688 }
689 }
690
691 /*
692 * arm_vm_populate_kernel_el0_mappings:
693 *
694 * This function adds all required mappings to the EL0 kernel mappings.
695 */
696 static void
697 arm_vm_populate_kernel_el0_mappings(void)
698 {
699 arm_vm_prepare_kernel_el0_mappings(FALSE);
700 }
701
702 /*
703 * arm_vm_expand_kernel_el0_mappings:
704 *
705 * This function expands the kernel page tables to accomodate the EL0 kernel
706 * mappings.
707 */
708 static void
709 arm_vm_expand_kernel_el0_mappings(void)
710 {
711 arm_vm_prepare_kernel_el0_mappings(TRUE);
712 }
713 #endif /* __ARM_KERNEL_PROTECT__ */
714
715 #if defined(KERNEL_INTEGRITY_KTRR)
716 extern void bootstrap_instructions;
717
718 /*
719 * arm_replace_identity_map takes the V=P map that we construct in start.s
720 * and repurposes it in order to have it map only the page we need in order
721 * to turn on the MMU. This prevents us from running into issues where
722 * KTRR will cause us to fault on executable block mappings that cross the
723 * KTRR boundary.
724 */
725 static void arm_replace_identity_map(boot_args * args)
726 {
727 vm_offset_t addr;
728 pmap_paddr_t paddr;
729
730 pmap_paddr_t l1_ptp_phys = 0;
731 tt_entry_t *l1_ptp_virt = NULL;
732 tt_entry_t *tte1 = NULL;
733 pmap_paddr_t l2_ptp_phys = 0;
734 tt_entry_t *l2_ptp_virt = NULL;
735 tt_entry_t *tte2 = NULL;
736 pmap_paddr_t l3_ptp_phys = 0;
737 pt_entry_t *l3_ptp_virt = NULL;
738 pt_entry_t *ptep = NULL;
739
740 addr = ((vm_offset_t)&bootstrap_instructions) & ~ARM_PGMASK;
741 paddr = kvtophys(addr);
742
743 /*
744 * The V=P page tables (at the time this comment was written) start
745 * after the last bit of kernel data, and consist of 1 L1 page and 1 or
746 * more L2 pages.
747 * Grab references to those pages, and allocate an L3 page.
748 */
749 l1_ptp_phys = args->topOfKernelData;
750 l1_ptp_virt = (tt_entry_t *)phystokv(l1_ptp_phys);
751 tte1 = &l1_ptp_virt[L1_TABLE_INDEX(paddr)];
752
753 l2_ptp_virt = L2_TABLE_VA(tte1);
754 l2_ptp_phys = (*tte1) & ARM_TTE_TABLE_MASK;
755 tte2 = &l2_ptp_virt[L2_TABLE_INDEX(paddr)];
756
757 l3_ptp_virt = (pt_entry_t *)alloc_ptpage(FALSE);
758 l3_ptp_phys = kvtophys((vm_offset_t)l3_ptp_virt);
759 ptep = &l3_ptp_virt[L3_TABLE_INDEX(paddr)];
760
761 /*
762 * Replace the large V=P mapping with a mapping that provides only the
763 * mappings needed to turn on the MMU.
764 */
765
766 bzero(l1_ptp_virt, ARM_PGBYTES);
767 *tte1 = ARM_TTE_BOOT_TABLE | (l2_ptp_phys & ARM_TTE_TABLE_MASK);
768
769 bzero(l2_ptp_virt, ARM_PGBYTES);
770 *tte2 = ARM_TTE_BOOT_TABLE | (l3_ptp_phys & ARM_TTE_TABLE_MASK);
771
772 *ptep = (paddr & ARM_PTE_MASK) |
773 ARM_PTE_TYPE_VALID |
774 ARM_PTE_SH(SH_OUTER_MEMORY) |
775 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) |
776 ARM_PTE_AF |
777 ARM_PTE_AP(AP_RONA) |
778 ARM_PTE_NX;
779 }
780 #endif /* defined(KERNEL_INTEGRITY_KTRR)*/
781
782 tt_entry_t *arm_kva_to_tte(vm_offset_t);
783
784 tt_entry_t *
785 arm_kva_to_tte(vm_offset_t va)
786 {
787 tt_entry_t *tte1, *tte2;
788 tte1 = cpu_tte + L1_TABLE_INDEX(va);
789 tte2 = L2_TABLE_VA(tte1) + L2_TABLE_INDEX(va);
790
791 return tte2;
792 }
793
794
795 #define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
796 #define ARM64_GRANULE_ALLOW_HINT (1 << 1)
797
798 /*
799 * arm_vm_page_granular_helper updates protections at the L3 level. It will (if
800 * neccessary) allocate a page for the L3 table and update the corresponding L2
801 * entry. Then, it will iterate over the L3 table, updating protections as necessary.
802 * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should
803 * not be invoked from a context that does not do L2 iteration separately (basically,
804 * don't call this except from arm_vm_page_granular_prot).
805 *
806 * unsigned granule: 0 => force to page granule, or a combination of
807 * ARM64_GRANULE_* flags declared above.
808 */
809
810 static void
811 arm_vm_page_granular_helper(vm_offset_t start, vm_offset_t _end, vm_offset_t va, pmap_paddr_t pa_offset,
812 int pte_prot_APX, int pte_prot_XN, unsigned granule,
813 pt_entry_t **deferred_pte, pt_entry_t *deferred_ptmp)
814 {
815 if (va & ARM_TT_L2_OFFMASK) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */
816 tt_entry_t *tte2;
817 tt_entry_t tmplate;
818 pmap_paddr_t pa;
819 pt_entry_t *ppte, *recursive_pte = NULL, ptmp, recursive_ptmp = 0;
820 addr64_t ppte_phys;
821 unsigned i;
822
823 va &= ~ARM_TT_L2_OFFMASK;
824 pa = va - gVirtBase + gPhysBase - pa_offset;
825
826 if (pa >= real_avail_end)
827 return;
828
829 tte2 = arm_kva_to_tte(va);
830
831 assert(_end >= va);
832 tmplate = *tte2;
833
834 if (ARM_TTE_TYPE_TABLE == (tmplate & ARM_TTE_TYPE_MASK)) {
835 /* pick up the existing page table. */
836 ppte = (pt_entry_t *)phystokv((tmplate & ARM_TTE_TABLE_MASK));
837 } else {
838 // TTE must be reincarnated with page level mappings.
839 ppte = (pt_entry_t*)alloc_ptpage(pa_offset == 0);
840 bzero(ppte, ARM_PGBYTES);
841 ppte_phys = kvtophys((vm_offset_t)ppte);
842
843 *tte2 = pa_to_tte(ppte_phys) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
844 }
845
846 vm_offset_t len = _end - va;
847 if ((pa + len) > real_avail_end)
848 _end -= (pa + len - real_avail_end);
849 assert((start - gVirtBase + gPhysBase - pa_offset) >= gPhysBase);
850
851 /* Round up to the nearest PAGE_SIZE boundary when creating mappings:
852 * PAGE_SIZE may be a multiple of ARM_PGBYTES, and we don't want to leave
853 * a ragged non-PAGE_SIZE-aligned edge. */
854 vm_offset_t rounded_end = round_page(_end);
855 /* Apply the desired protections to the specified page range */
856 for (i = 0; i <= (ARM_TT_L3_INDEX_MASK>>ARM_TT_L3_SHIFT); i++) {
857 if ((start <= va) && (va < rounded_end)) {
858
859 ptmp = pa | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE;
860 ptmp = ptmp | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
861 ptmp = ptmp | ARM_PTE_AP(pte_prot_APX);
862 ptmp = ptmp | ARM_PTE_NX;
863 #if __ARM_KERNEL_PROTECT__
864 ptmp = ptmp | ARM_PTE_NG;
865 #endif /* __ARM_KERNEL_PROTECT__ */
866
867 if (pte_prot_XN) {
868 ptmp = ptmp | ARM_PTE_PNX;
869 }
870
871 /*
872 * If we can, apply the contiguous hint to this range. The hint is
873 * applicable if the current address falls within a hint-sized range that will
874 * be fully covered by this mapping request.
875 */
876 if ((va >= round_up_pte_hint_address(start)) && (round_up_pte_hint_address(va + 1) <= _end) &&
877 (granule & ARM64_GRANULE_ALLOW_HINT) && use_contiguous_hint) {
878 assert((va & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)) == ((pa & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1))));
879 ptmp |= ARM_PTE_HINT;
880 /* Do not attempt to reapply the hint bit to an already-active mapping.
881 * This very likely means we're attempting to change attributes on an already-active mapping,
882 * which violates the requirement of the hint bit.*/
883 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT));
884 }
885 /*
886 * Do not change the contiguous bit on an active mapping. Even in a single-threaded
887 * environment, it's possible for prefetch to produce a TLB conflict by trying to pull in
888 * a hint-sized entry on top of one or more existing page-sized entries. It's also useful
889 * to make sure we're not trying to unhint a sub-range of a larger hinted range, which
890 * could produce a later TLB conflict.
891 */
892 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT) || ((ppte[i] & ARM_PTE_HINT) == (ptmp & ARM_PTE_HINT)));
893
894 /*
895 * If we reach an entry that maps the current pte page, delay updating it until the very end.
896 * Otherwise we might end up making the PTE page read-only, leading to a fault later on in
897 * this function if we manage to outrun the TLB. This can happen on KTRR-enabled devices when
898 * marking segDATACONST read-only. Mappings for this region may straddle a PT page boundary,
899 * so we must also defer assignment of the following PTE. We will assume that if the region
900 * were to require one or more full L3 pages, it would instead use L2 blocks where possible,
901 * therefore only requiring at most one L3 page at the beginning and one at the end.
902 */
903 if (kva_active && ((pt_entry_t*)(phystokv(pa)) == ppte)) {
904 assert(recursive_pte == NULL);
905 assert(granule & ARM64_GRANULE_ALLOW_BLOCK);
906 recursive_pte = &ppte[i];
907 recursive_ptmp = ptmp;
908 } else if ((deferred_pte != NULL) && (&ppte[i] == &recursive_pte[1])) {
909 assert(*deferred_pte == NULL);
910 assert(deferred_ptmp != NULL);
911 *deferred_pte = &ppte[i];
912 *deferred_ptmp = ptmp;
913 } else {
914 ppte[i] = ptmp;
915 }
916 }
917
918 va += ARM_PGBYTES;
919 pa += ARM_PGBYTES;
920 }
921 if (recursive_pte != NULL)
922 *recursive_pte = recursive_ptmp;
923 }
924 }
925
926 /*
927 * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and
928 * changing them. If a particular chunk necessitates L3 entries (for reasons of
929 * alignment or length, or an explicit request that the entry be fully expanded), we
930 * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic.
931 */
932 static void
933 arm_vm_page_granular_prot(vm_offset_t start, unsigned long size, pmap_paddr_t pa_offset,
934 int tte_prot_XN, int pte_prot_APX, int pte_prot_XN,
935 unsigned granule)
936 {
937 pt_entry_t *deferred_pte = NULL, deferred_ptmp = 0;
938 vm_offset_t _end = start + size;
939 vm_offset_t align_start = (start + ARM_TT_L2_OFFMASK) & ~ARM_TT_L2_OFFMASK;
940
941 if (size == 0x0UL)
942 return;
943
944 if (align_start > _end) {
945 arm_vm_page_granular_helper(start, _end, start, pa_offset, pte_prot_APX, pte_prot_XN, granule, NULL, NULL);
946 return;
947 }
948
949 arm_vm_page_granular_helper(start, align_start, start, pa_offset, pte_prot_APX, pte_prot_XN, granule, &deferred_pte, &deferred_ptmp);
950
951 while ((_end - align_start) >= ARM_TT_L2_SIZE) {
952 if (!(granule & ARM64_GRANULE_ALLOW_BLOCK)) {
953 arm_vm_page_granular_helper(align_start, align_start+ARM_TT_L2_SIZE, align_start + 1, pa_offset,
954 pte_prot_APX, pte_prot_XN, granule, NULL, NULL);
955 } else {
956 pmap_paddr_t pa = align_start - gVirtBase + gPhysBase - pa_offset;
957 assert((pa & ARM_TT_L2_OFFMASK) == 0);
958 tt_entry_t *tte2;
959 tt_entry_t tmplate;
960
961 tte2 = arm_kva_to_tte(align_start);
962
963 if ((pa >= gPhysBase) && (pa < real_avail_end)) {
964 tmplate = (pa & ARM_TTE_BLOCK_L2_MASK) | ARM_TTE_TYPE_BLOCK
965 | ARM_TTE_VALID | ARM_TTE_BLOCK_AF | ARM_TTE_BLOCK_NX
966 | ARM_TTE_BLOCK_AP(pte_prot_APX) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY)
967 | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
968
969 #if __ARM_KERNEL_PROTECT__
970 tmplate = tmplate | ARM_TTE_BLOCK_NG;
971 #endif /* __ARM_KERNEL_PROTECT__ */
972 if (tte_prot_XN)
973 tmplate = tmplate | ARM_TTE_BLOCK_PNX;
974
975 *tte2 = tmplate;
976 }
977 }
978 align_start += ARM_TT_L2_SIZE;
979 }
980
981 if (align_start < _end)
982 arm_vm_page_granular_helper(align_start, _end, _end, pa_offset, pte_prot_APX, pte_prot_XN, granule, &deferred_pte, &deferred_ptmp);
983
984 if (deferred_pte != NULL)
985 *deferred_pte = deferred_ptmp;
986 }
987
988 static inline void
989 arm_vm_page_granular_RNX(vm_offset_t start, unsigned long size, unsigned granule)
990 {
991 arm_vm_page_granular_prot(start, size, 0, 1, AP_RONA, 1, granule);
992 }
993
994 static inline void
995 arm_vm_page_granular_ROX(vm_offset_t start, unsigned long size, unsigned granule)
996 {
997 arm_vm_page_granular_prot(start, size, 0, 0, AP_RONA, 0, granule);
998 }
999
1000 static inline void
1001 arm_vm_page_granular_RWNX(vm_offset_t start, unsigned long size, unsigned granule)
1002 {
1003 arm_vm_page_granular_prot(start, size, 0, 1, AP_RWNA, 1, granule);
1004 }
1005
1006 /* used in the chosen/memory-map node, populated by iBoot. */
1007 typedef struct MemoryMapFileInfo {
1008 vm_offset_t paddr;
1009 size_t length;
1010 } MemoryMapFileInfo;
1011
1012 void
1013 arm_vm_prot_init(boot_args * args)
1014 {
1015
1016 segLOWESTTEXT = UINT64_MAX;
1017 if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) segLOWESTTEXT = segPRELINKTEXTB;
1018 assert(segSizeTEXT);
1019 if (segTEXTB < segLOWESTTEXT) segLOWESTTEXT = segTEXTB;
1020 assert(segLOWESTTEXT < UINT64_MAX);
1021
1022 segEXTRADATA = segLOWESTTEXT;
1023 segSizeEXTRADATA = 0;
1024
1025 segLOWEST = segLOWESTTEXT;
1026
1027 DTEntry memory_map;
1028 MemoryMapFileInfo *trustCacheRange;
1029 unsigned int trustCacheRangeSize;
1030 int err;
1031
1032 err = DTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1033 assert(err == kSuccess);
1034
1035 err = DTGetProperty(memory_map, "TrustCache", (void**)&trustCacheRange, &trustCacheRangeSize);
1036 if (err == kSuccess) {
1037 assert(trustCacheRangeSize == sizeof(MemoryMapFileInfo));
1038
1039 segEXTRADATA = phystokv(trustCacheRange->paddr);
1040 segSizeEXTRADATA = trustCacheRange->length;
1041
1042 if (segEXTRADATA <= segLOWEST) {
1043 segLOWEST = segEXTRADATA;
1044 }
1045 #if !(DEBUG || DEVELOPMENT)
1046
1047
1048 else {
1049 panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1050 }
1051 #endif /* !(DEBUG || DEVELOPMENT) */
1052
1053 arm_vm_page_granular_RNX(segEXTRADATA, segSizeEXTRADATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1054
1055 }
1056
1057 /* Map coalesced kext TEXT segment RWNX for now */
1058 arm_vm_page_granular_RWNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1059
1060 /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */
1061 arm_vm_page_granular_RWNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1062
1063 /* Map coalesced kext TEXT_EXEC segment RX (could be empty) */
1064 arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Refined in OSKext::readPrelinkedExtensions
1065
1066 /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX
1067 * otherwise we no longer expect any space between the coalesced kext read only segments and xnu rosegments
1068 */
1069 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC) {
1070 if (segSizePRELINKTEXT) {
1071 arm_vm_page_granular_RWNX(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT),
1072 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1073 }
1074 } else {
1075 /*
1076 * If we have the new segments, we should still protect the gap between kext
1077 * read-only pages and kernel read-only pages, in the event that this gap
1078 * exists.
1079 */
1080 if ((segPLKDATACONSTB + segSizePLKDATACONST) < segTEXTB) {
1081 arm_vm_page_granular_RWNX(segPLKDATACONSTB + segSizePLKDATACONST, segTEXTB - (segPLKDATACONSTB + segSizePLKDATACONST),
1082 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1083 }
1084 }
1085
1086 /*
1087 * Protection on kernel text is loose here to allow shenanigans early on. These
1088 * protections are tightened in arm_vm_prot_finalize(). This is necessary because
1089 * we currently patch LowResetVectorBase in cpu.c.
1090 *
1091 * TEXT segment contains mach headers and other non-executable data. This will become RONX later.
1092 */
1093 arm_vm_page_granular_RNX(segTEXTB, segSizeTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1094
1095 /* Can DATACONST start out and stay RNX?
1096 * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
1097 * Make RNX in prot_finalize
1098 */
1099 arm_vm_page_granular_RWNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1100
1101 arm_vm_page_granular_ROX(segTEXTEXECB, segSizeTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1102
1103
1104 /* DATA segment will remain RWNX */
1105 arm_vm_page_granular_RWNX(segDATAB, segSizeDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1106
1107 arm_vm_page_granular_RWNX(segBOOTDATAB, segSizeBOOTDATA, 0);
1108 arm_vm_page_granular_RNX((vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, 0);
1109 arm_vm_page_granular_RNX((vm_offset_t)&intstack_high_guard, PAGE_MAX_SIZE, 0);
1110 arm_vm_page_granular_RNX((vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, 0);
1111
1112 arm_vm_page_granular_ROX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1113 arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1114 arm_vm_page_granular_RWNX(segPLKLINKEDITB, segSizePLKLINKEDIT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Coalesced kext LINKEDIT segment
1115 arm_vm_page_granular_ROX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK); // __LAST may be empty, but we cannot assume this
1116
1117 arm_vm_page_granular_RWNX(segPRELINKDATAB, segSizePRELINKDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Prelink __DATA for kexts (RW data)
1118
1119 if (segSizePLKLLVMCOV > 0)
1120 arm_vm_page_granular_RWNX(segPLKLLVMCOVB, segSizePLKLLVMCOV, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // LLVM code coverage data
1121
1122 arm_vm_page_granular_RWNX(segPRELINKINFOB, segSizePRELINKINFO, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* PreLinkInfoDictionary */
1123
1124 arm_vm_page_granular_RNX(phystokv(args->topOfKernelData), BOOTSTRAP_TABLE_SIZE, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Boot page tables; they should not be mutable.
1125 }
1126
1127 /*
1128 * return < 0 for a < b
1129 * 0 for a == b
1130 * > 0 for a > b
1131 */
1132 typedef int (*cmpfunc_t)(const void *a, const void *b);
1133
1134 extern void
1135 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1136
1137 static int
1138 cmp_ptov_entries(const void *a, const void *b)
1139 {
1140 const ptov_table_entry *entry_a = a;
1141 const ptov_table_entry *entry_b = b;
1142 // Sort in descending order of segment length
1143 if (entry_a->len < entry_b->len)
1144 return 1;
1145 else if (entry_a->len > entry_b->len)
1146 return -1;
1147 else
1148 return 0;
1149 }
1150
1151 SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1152
1153 #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1154
1155 static void
1156 arm_vm_physmap_slide(ptov_table_entry *temp_ptov_table, vm_map_address_t physmap_base, vm_map_address_t orig_va, vm_size_t len, int pte_prot_APX, unsigned granule)
1157 {
1158 pmap_paddr_t pa_offset;
1159
1160 assert(ptov_index < PTOV_TABLE_SIZE);
1161 assert((orig_va & ARM_PGMASK) == 0);
1162 temp_ptov_table[ptov_index].pa = orig_va - gVirtBase + gPhysBase;
1163 if (ptov_index == 0)
1164 temp_ptov_table[ptov_index].va = physmap_base;
1165 else
1166 temp_ptov_table[ptov_index].va = temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len;
1167 if (granule & ARM64_GRANULE_ALLOW_BLOCK) {
1168 vm_map_address_t orig_offset = temp_ptov_table[ptov_index].pa & ARM_TT_TWIG_OFFMASK;
1169 vm_map_address_t new_offset = temp_ptov_table[ptov_index].va & ARM_TT_TWIG_OFFMASK;
1170 if (new_offset < orig_offset)
1171 temp_ptov_table[ptov_index].va += (orig_offset - new_offset);
1172 else if (new_offset > orig_offset)
1173 temp_ptov_table[ptov_index].va = ROUND_TWIG(temp_ptov_table[ptov_index].va) + orig_offset;
1174 }
1175 assert((temp_ptov_table[ptov_index].va & ARM_PGMASK) == 0);
1176 temp_ptov_table[ptov_index].len = round_page(len);
1177 pa_offset = temp_ptov_table[ptov_index].va - orig_va;
1178 arm_vm_page_granular_prot(temp_ptov_table[ptov_index].va, temp_ptov_table[ptov_index].len, pa_offset, 1, pte_prot_APX, 1, granule);
1179 ++ptov_index;
1180 }
1181
1182
1183 static void
1184 arm_vm_physmap_init(boot_args *args, vm_map_address_t physmap_base, vm_map_address_t dynamic_memory_begin __unused)
1185 {
1186 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1187 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1188
1189 // Will be handed back to VM layer through ml_static_mfree() in arm_vm_prot_finalize()
1190 arm_vm_physmap_slide(temp_ptov_table, physmap_base, gVirtBase, segLOWEST - gVirtBase, AP_RWNA,
1191 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1192
1193 arm_vm_page_granular_RWNX(end_kern, phystokv(args->topOfKernelData) - end_kern,
1194 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* Device Tree, RAM Disk (if present), bootArgs */
1195
1196 arm_vm_physmap_slide(temp_ptov_table, physmap_base, (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE - gPhysBase + gVirtBase),
1197 real_avail_end - (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE), AP_RWNA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // rest of physmem
1198
1199 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= dynamic_memory_begin);
1200
1201 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1202 // segments should be placed earliest in the table to optimize lookup performance.
1203 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1204
1205 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1206 }
1207
1208
1209 void
1210 arm_vm_prot_finalize(boot_args * args __unused)
1211 {
1212 /*
1213 * At this point, we are far enough along in the boot process that it will be
1214 * safe to free up all of the memory preceeding the kernel. It may in fact
1215 * be safe to do this earlier.
1216 *
1217 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1218 * as usable.
1219 */
1220
1221 /*
1222 * if old style PRELINK segment exists, free memory before it, and after it before XNU text
1223 * otherwise we're dealing with a new style kernel cache, so we should just free the
1224 * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments
1225 * should be immediately followed by XNU's TEXT segment
1226 */
1227
1228 ml_static_mfree(phystokv(gPhysBase), segLOWEST - gVirtBase);
1229
1230 /*
1231 * KTRR support means we will be mucking with these pages and trying to
1232 * protect them; we cannot free the pages to the VM if we do this.
1233 */
1234 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1235 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1236 ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1237 }
1238
1239 /* tighten permissions on kext read only data and code */
1240 arm_vm_page_granular_RNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK);
1241 arm_vm_page_granular_RNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1242
1243 cpu_stack_alloc(&BootCpuData);
1244 arm64_replace_bootstack(&BootCpuData);
1245 ml_static_mfree(phystokv(segBOOTDATAB - gVirtBase + gPhysBase), segSizeBOOTDATA);
1246
1247 #if __ARM_KERNEL_PROTECT__
1248 arm_vm_populate_kernel_el0_mappings();
1249 #endif /* __ARM_KERNEL_PROTECT__ */
1250
1251
1252 #if defined(KERNEL_INTEGRITY_KTRR)
1253 /*
1254 * __LAST,__pinst should no longer be executable.
1255 */
1256 arm_vm_page_granular_RNX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK);
1257
1258 /*
1259 * Must wait until all other region permissions are set before locking down DATA_CONST
1260 * as the kernel static page tables live in DATA_CONST on KTRR enabled systems
1261 * and will become immutable.
1262 */
1263 #endif
1264
1265 arm_vm_page_granular_RNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1266
1267 __builtin_arm_dsb(DSB_ISH);
1268 flush_mmu_tlb();
1269 }
1270
1271 #define TBI_USER 0x1
1272 #define TBI_KERNEL 0x2
1273
1274 boolean_t user_tbi = TRUE;
1275
1276 /*
1277 * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1278 * address accesses. It can be enabled separately for TTBR0 (user) and
1279 * TTBR1 (kernel). We enable it by default for user only, but allow both
1280 * to be controlled by the 'tbi' boot-arg.
1281 */
1282 static void
1283 set_tbi(void)
1284 {
1285 #if !__ARM_KERNEL_PROTECT__
1286 /* If we are not built with __ARM_KERNEL_PROTECT__, TBI can be turned
1287 * off with a boot-arg.
1288 */
1289 uint64_t old_tcr, new_tcr;
1290 int tbi = 0;
1291
1292 if (PE_parse_boot_argn("tbi", &tbi, sizeof(tbi)))
1293 user_tbi = ((tbi & TBI_USER) == TBI_USER);
1294 old_tcr = new_tcr = get_tcr();
1295 new_tcr |= (user_tbi) ? TCR_TBI0_TOPBYTE_IGNORED : 0;
1296
1297 #if !defined(HAS_APPLE_PAC)
1298 /*
1299 * arm_vm_init() runs after rebase_threaded_starts(), so enabling TBI1
1300 * at this point will break the computed pointer signatures. TBID1
1301 * could help mitigate this problem, but for now we'll just disable
1302 * kernel TBI if PAC is being used.
1303 */
1304 new_tcr |= (tbi & TBI_KERNEL) ? TCR_TBI1_TOPBYTE_IGNORED : 0;
1305 #endif
1306
1307 if (old_tcr != new_tcr) {
1308 set_tcr(new_tcr);
1309 sysreg_restore.tcr_el1 = new_tcr;
1310 }
1311 #endif /* !__ARM_KERNEL_PROTECT__ */
1312 }
1313
1314 #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1315 #define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1316
1317 void
1318 arm_vm_init(uint64_t memory_size, boot_args * args)
1319 {
1320 vm_map_address_t va_l1, va_l1_end;
1321 tt_entry_t *cpu_l1_tte;
1322 vm_map_address_t va_l2, va_l2_end;
1323 tt_entry_t *cpu_l2_tte;
1324 pmap_paddr_t boot_ttep;
1325 tt_entry_t *boot_tte;
1326 uint64_t mem_segments;
1327 vm_offset_t ptpage_vaddr;
1328 vm_map_address_t dynamic_memory_begin;
1329 vm_map_address_t physmap_base;
1330
1331
1332 /*
1333 * Get the virtual and physical memory base from boot_args.
1334 */
1335 gVirtBase = args->virtBase;
1336 gPhysBase = args->physBase;
1337 #if KASAN
1338 real_phys_size = args->memSize + (shadow_ptop - shadow_pbase);
1339 #else
1340 real_phys_size = args->memSize;
1341 #endif
1342 /*
1343 * Ensure the physical region we specify for the VM to manage ends on a
1344 * software page boundary. Note that the software page size (PAGE_SIZE)
1345 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1346 * We must round the reported memory size down to the nearest PAGE_SIZE
1347 * boundary to ensure the VM does not try to manage a page it does not
1348 * completely own. The KASAN shadow region, if present, is managed entirely
1349 * in units of the hardware page size and should not need similar treatment.
1350 */
1351 gPhysSize = mem_size = ((gPhysBase + args->memSize) & ~PAGE_MASK) - gPhysBase;
1352
1353 if ((memory_size != 0) && (mem_size > memory_size))
1354 mem_size = memory_size;
1355 if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 4))
1356 panic("Unsupported memory configuration %lx\n", mem_size);
1357
1358 physmap_base = phystokv(args->topOfKernelData) + BOOTSTRAP_TABLE_SIZE;
1359
1360 // Slide the physical aperture to a random page-aligned location within the slide range
1361 uint64_t physmap_slide = early_random() & ARM64_PHYSMAP_SLIDE_MASK & ~((uint64_t)PAGE_MASK);
1362 assert(physmap_slide < ARM64_PHYSMAP_SLIDE_RANGE);
1363
1364 physmap_base += physmap_slide;
1365
1366 static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment
1367 #if KASAN
1368 /* add the KASAN stolen memory to the physmap */
1369 dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1370 #else
1371 dynamic_memory_begin = static_memory_end;
1372 #endif
1373 if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS)
1374 panic("Unsupported memory configuration %lx\n", mem_size);
1375
1376 boot_ttep = args->topOfKernelData;
1377 boot_tte = (tt_entry_t *) phystokv(boot_ttep);
1378
1379 #if DEVELOPMENT || DEBUG
1380 /* Sanity check - assert that BOOTSTRAP_TABLE_SIZE is sufficiently-large to
1381 * hold our bootstrap mappings for any possible slide */
1382 size_t bytes_mapped = dynamic_memory_begin - gVirtBase;
1383 size_t l1_entries = 1 + ((bytes_mapped + ARM_TT_L1_SIZE - 1) / ARM_TT_L1_SIZE);
1384 /* 1 L1 each for V=P and KVA, plus 1 page for each L2 */
1385 size_t pages_used = 2 * (l1_entries + 1);
1386 if (pages_used > BOOTSTRAP_TABLE_SIZE) {
1387 panic("BOOTSTRAP_TABLE_SIZE too small for memory config\n");
1388 }
1389 #endif
1390
1391 /*
1392 * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping.
1393 * TTBR1 L1, TTBR1 L2 - kernel mapping
1394 */
1395 avail_start = boot_ttep + BOOTSTRAP_TABLE_SIZE;
1396
1397 #if defined(KERNEL_INTEGRITY_KTRR)
1398 arm_replace_identity_map(args);
1399 #endif
1400
1401 /* Initialize invalid tte page */
1402 invalid_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1403 invalid_ttep = kvtophys((vm_offset_t)invalid_tte);
1404 bzero(invalid_tte, ARM_PGBYTES);
1405
1406 /*
1407 * Initialize l1 page table page
1408 */
1409 cpu_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1410 cpu_ttep = kvtophys((vm_offset_t)cpu_tte);
1411 bzero(cpu_tte, ARM_PGBYTES);
1412 avail_end = gPhysBase + mem_size;
1413 assert(!(avail_end & PAGE_MASK));
1414
1415 #if KASAN
1416 real_avail_end = gPhysBase + real_phys_size;
1417 #else
1418 real_avail_end = avail_end;
1419 #endif
1420
1421 /*
1422 * Initialize l1 and l2 page table pages :
1423 * map physical memory at the kernel base virtual address
1424 * cover the kernel dynamic address range section
1425 *
1426 * the so called physical aperture should be statically mapped
1427 */
1428 va_l1 = gVirtBase;
1429 va_l1_end = dynamic_memory_begin;
1430 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1431
1432 while (va_l1 < va_l1_end) {
1433 if (*cpu_l1_tte == ARM_TTE_EMPTY) {
1434 /* Allocate a page and setup L1 Table TTE in L1 */
1435 ptpage_vaddr = alloc_ptpage(TRUE);
1436 *cpu_l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
1437 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1438 }
1439
1440 if ((va_l1 + ARM_TT_L1_SIZE) < va_l1) {
1441 /* If this is the last L1 entry, it must cover the last mapping. */
1442 break;
1443 }
1444
1445 va_l1 += ARM_TT_L1_SIZE;
1446 cpu_l1_tte++;
1447 }
1448
1449 #if __ARM_KERNEL_PROTECT__
1450 /* Expand the page tables to prepare for the EL0 mappings. */
1451 arm_vm_expand_kernel_el0_mappings();
1452 #endif /* __ARM_KERNEL_PROTECT__ */
1453
1454 /*
1455 * Now retrieve addresses for end, edata, and etext from MACH-O headers
1456 */
1457 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1458 segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1459 segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1460 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1461 segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1462 segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1463 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
1464
1465 segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
1466 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
1467 segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
1468 segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
1469 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
1470 segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
1471 segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
1472 segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
1473
1474 (void) PE_parse_boot_argn("use_contiguous_hint", &use_contiguous_hint, sizeof(use_contiguous_hint));
1475 assert(segSizePRELINKTEXT < 0x03000000); /* 23355738 */
1476
1477 /* if one of the new segments is present, the other one better be as well */
1478 if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
1479 assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
1480 }
1481
1482 etext = (vm_offset_t) segTEXTB + segSizeTEXT;
1483 sdata = (vm_offset_t) segDATAB;
1484 edata = (vm_offset_t) segDATAB + segSizeDATA;
1485 end_kern = round_page(getlastaddr()); /* Force end to next page */
1486
1487 vm_set_page_size();
1488
1489 vm_kernel_base = segTEXTB;
1490 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
1491 vm_kext_base = segPRELINKTEXTB;
1492 vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
1493
1494 vm_prelink_stext = segPRELINKTEXTB;
1495 if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
1496 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
1497 } else {
1498 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
1499 }
1500 vm_prelink_sinfo = segPRELINKINFOB;
1501 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
1502 vm_slinkedit = segLINKB;
1503 vm_elinkedit = segLINKB + segSizeLINK;
1504
1505 vm_prelink_sdata = segPRELINKDATAB;
1506 vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
1507
1508 arm_vm_prot_init(args);
1509
1510
1511 /*
1512 * Initialize the page tables for the low globals:
1513 * cover this address range:
1514 * LOW_GLOBAL_BASE_ADDRESS + 2MB
1515 */
1516 va_l1 = va_l2 = LOW_GLOBAL_BASE_ADDRESS;
1517 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1518 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1519 ptpage_vaddr = alloc_ptpage(TRUE);
1520 *cpu_l2_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN;
1521 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1522
1523 /*
1524 * Initialize l2 page table pages :
1525 * cover this address range:
1526 * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS
1527 */
1528 va_l1 = dynamic_memory_begin;
1529 va_l1_end = VM_MAX_KERNEL_ADDRESS;
1530 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1531
1532 while (va_l1 < va_l1_end) {
1533 if (*cpu_l1_tte == ARM_TTE_EMPTY) {
1534 /* Allocate a page and setup L1 Table TTE in L1 */
1535 ptpage_vaddr = alloc_ptpage(TRUE);
1536 *cpu_l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
1537 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1538 }
1539
1540 if ((va_l1 + ARM_TT_L1_SIZE) < va_l1) {
1541 /* If this is the last L1 entry, it must cover the last mapping. */
1542 break;
1543 }
1544
1545 va_l1 += ARM_TT_L1_SIZE;
1546 cpu_l1_tte++;
1547 }
1548
1549 #if KASAN
1550 /* record the extent of the physmap */
1551 physmap_vbase = physmap_base;
1552 physmap_vtop = static_memory_end;
1553 kasan_init();
1554 #endif /* KASAN */
1555
1556 #if MONOTONIC
1557 mt_early_init();
1558 #endif /* MONOTONIC */
1559
1560 set_tbi();
1561
1562 arm_vm_physmap_init(args, physmap_base, dynamic_memory_begin);
1563 set_mmu_ttb_alternate(cpu_ttep & TTBR_BADDR_MASK);
1564
1565
1566 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
1567
1568 flush_mmu_tlb();
1569 kva_active = TRUE;
1570 // global table pointers may need to be different due to physical aperture remapping
1571 cpu_tte = (tt_entry_t*)(phystokv(cpu_ttep));
1572 invalid_tte = (tt_entry_t*)(phystokv(invalid_ttep));
1573
1574 sane_size = mem_size - (avail_start - gPhysBase);
1575 max_mem = mem_size;
1576 vm_kernel_slid_base = segLOWESTTEXT;
1577 vm_kernel_slid_top = vm_prelink_einfo;
1578 vm_kernel_slide = segTEXTB-VM_KERNEL_LINK_ADDRESS;
1579 vm_kernel_stext = segTEXTB;
1580 assert(segDATACONSTB == segTEXTB + segSizeTEXT);
1581 assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
1582 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
1583
1584 dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
1585 pmap_bootstrap(dynamic_memory_begin);
1586
1587 disable_preemption();
1588
1589 /*
1590 * Initialize l3 page table pages :
1591 * cover this address range:
1592 * 2MB + FrameBuffer size + 10MB for each 256MB segment
1593 */
1594
1595 mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
1596
1597 va_l1 = dynamic_memory_begin;
1598 va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
1599 va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
1600 va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
1601
1602 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1603
1604 while (va_l1 < va_l1_end) {
1605
1606 va_l2 = va_l1;
1607
1608 if (((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE) < va_l1) {
1609 /* If this is the last L1 entry, it must cover the last mapping. */
1610 va_l2_end = va_l1_end;
1611 } else {
1612 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE, va_l1_end);
1613 }
1614
1615 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1616
1617 while (va_l2 < va_l2_end) {
1618 pt_entry_t * ptp;
1619 pmap_paddr_t ptp_phys;
1620
1621 /* Allocate a page and setup L3 Table TTE in L2 */
1622 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
1623 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
1624
1625 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE, TRUE);
1626
1627 *cpu_l2_tte = (pa_to_tte (ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
1628
1629 va_l2 += ARM_TT_L2_SIZE;
1630 cpu_l2_tte++;
1631 };
1632
1633 va_l1 = va_l2_end;
1634 cpu_l1_tte++;
1635 }
1636
1637 /*
1638 * Initialize l3 page table pages :
1639 * cover this address range:
1640 * (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VM_MAX_KERNEL_ADDRESS
1641 */
1642 va_l1 = VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK;
1643 va_l1_end = VM_MAX_KERNEL_ADDRESS;
1644
1645 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1646
1647 while (va_l1 < va_l1_end) {
1648
1649 va_l2 = va_l1;
1650
1651 if (((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE) < va_l1) {
1652 /* If this is the last L1 entry, it must cover the last mapping. */
1653 va_l2_end = va_l1_end;
1654 } else {
1655 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE, va_l1_end);
1656 }
1657
1658 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1659
1660 while (va_l2 < va_l2_end) {
1661 pt_entry_t * ptp;
1662 pmap_paddr_t ptp_phys;
1663
1664 /* Allocate a page and setup L3 Table TTE in L2 */
1665 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
1666 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
1667
1668 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE, TRUE);
1669
1670 *cpu_l2_tte = (pa_to_tte (ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
1671
1672 va_l2 += ARM_TT_L2_SIZE;
1673 cpu_l2_tte++;
1674 };
1675
1676 va_l1 = va_l2_end;
1677 cpu_l1_tte++;
1678 }
1679
1680 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
1681 /*
1682 * In this configuration, the bootstrap mappings (arm_vm_init) and
1683 * the heap mappings occupy separate L1 regions. Explicitly set up
1684 * the heap L1 allocations here.
1685 */
1686 va_l1 = VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK;
1687 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1688
1689 while ((va_l1 >= (VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK)) && (va_l1 < VM_MAX_KERNEL_ADDRESS)) {
1690 /*
1691 * If the L1 entry has not yet been allocated, allocate it
1692 * now and treat it as a heap table.
1693 */
1694 if (*cpu_l1_tte == ARM_TTE_EMPTY) {
1695 tt_entry_t *new_tte = (tt_entry_t*)alloc_ptpage(FALSE);
1696 bzero(new_tte, ARM_PGBYTES);
1697 *cpu_l1_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
1698 }
1699
1700 cpu_l1_tte++;
1701 va_l1 += ARM_TT_L1_SIZE;
1702 }
1703 #endif
1704
1705 /*
1706 * Adjust avail_start so that the range that the VM owns
1707 * starts on a PAGE_SIZE aligned boundary.
1708 */
1709 avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
1710
1711 first_avail = avail_start;
1712 patch_low_glo_static_region(args->topOfKernelData, avail_start - args->topOfKernelData);
1713 enable_preemption();
1714 }
1715