]> git.saurik.com Git - apple/xnu.git/blame - osfmk/arm64/arm_vm_init.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / osfmk / arm64 / arm_vm_init.c
CommitLineData
5ba3f43e
A
1/*
2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach_debug.h>
30#include <mach_kdp.h>
31#include <debug.h>
32
33#include <mach/vm_types.h>
34#include <mach/vm_param.h>
35#include <kern/misc_protos.h>
36#include <kern/assert.h>
37#include <vm/vm_kern.h>
38#include <vm/vm_page.h>
39#include <vm/pmap.h>
40
d9a64523 41#include <arm/atomic.h>
5ba3f43e
A
42#include <arm64/proc_reg.h>
43#include <arm64/lowglobals.h>
44#include <arm/cpu_data_internal.h>
45#include <arm/misc_protos.h>
46#include <pexpert/arm64/boot.h>
d9a64523 47#include <pexpert/device_tree.h>
5ba3f43e
A
48
49#include <libkern/kernel_mach_header.h>
50#include <libkern/section_keywords.h>
51
d9a64523 52#include <san/kasan.h>
5c9f4661
A
53
54#if __ARM_KERNEL_PROTECT__
55/*
56 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
57 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
58 * range covered by TTBR1 in half). This must also cover the exception vectors.
59 */
60static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
61
62/* The exception vectors and the kernel cannot share root TTEs. */
63static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
64
65/*
66 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
67 * the exception vectors.
68 */
69static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
70#endif /* __ARM_KERNEL_PROTECT__ */
71
d9a64523
A
72#define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
73
5ba3f43e
A
74#if KASAN
75extern vm_offset_t shadow_pbase;
76extern vm_offset_t shadow_ptop;
77extern vm_offset_t physmap_vbase;
78extern vm_offset_t physmap_vtop;
79#endif
80
81/*
82 * Denotes the end of xnu.
83 */
84extern void *last_kernel_symbol;
85
d9a64523
A
86extern void arm64_replace_bootstack(cpu_data_t*);
87extern void PE_slide_devicetree(vm_offset_t);
88
5ba3f43e
A
89/*
90 * KASLR parameters
91 */
92SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
93SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
94SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
95SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
96SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
97SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
98SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
99SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
100SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
101
102SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
103SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
104SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
105SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
106SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
107SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
108SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
109SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
110
d9a64523
A
111SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
112SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
113
5ba3f43e
A
114/* Used by <mach/arm/vm_param.h> */
115SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
116SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
117SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
d9a64523
A
118SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
119SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
120
121/* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
122 * all kexts before the kernel. This is only for arm64 devices and looks
123 * something like the following:
124 * -- vmaddr order --
125 * 0xffffff8004004000 __PRELINK_TEXT
126 * 0xffffff8007004000 __TEXT (xnu)
127 * 0xffffff80075ec000 __DATA (xnu)
128 * 0xffffff80076dc000 __KLD (xnu)
129 * 0xffffff80076e0000 __LAST (xnu)
130 * 0xffffff80076e4000 __LINKEDIT (xnu)
131 * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
132 * 0xffffff800782c000 __PRELINK_INFO
133 * 0xffffff80078e4000 -- End of kernelcache
134 */
5ba3f43e 135
d9a64523
A
136/* 24921709 - make XNU ready for KTRR
137 *
138 * Two possible kernel cache layouts, depending on which kcgen is being used.
139 * VAs increasing downwards.
140 * Old KCGEN:
141 *
142 * __PRELINK_TEXT
143 * __TEXT
144 * __DATA_CONST
145 * __TEXT_EXEC
146 * __KLD
147 * __LAST
148 * __DATA
149 * __PRELINK_DATA (expected empty)
150 * __LINKEDIT
151 * __PRELINK_INFO
152 *
153 * New kcgen:
154 *
155 * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment
156 * __PLK_DATA_CONST
157 * __PLK_TEXT_EXEC
158 * __TEXT
159 * __DATA_CONST
160 * __TEXT_EXEC
161 * __KLD
162 * __LAST <--- Last KTRR (ReadOnly) segment
163 * __DATA
164 * __BOOTDATA (if present)
165 * __LINKEDIT
166 * __PRELINK_DATA (expected populated now)
167 * __PLK_LINKEDIT
168 * __PRELINK_INFO
169 *
5ba3f43e 170 */
d9a64523 171
5ba3f43e
A
172vm_offset_t mem_size; /* Size of actual physical memory present
173 * minus any performance buffer and possibly
174 * limited by mem_limit in bytes */
175uint64_t mem_actual; /* The "One True" physical memory size
176 * actually, it's the highest physical
177 * address + 1 */
178uint64_t max_mem; /* Size of physical memory (bytes), adjusted
179 * by maxmem */
180uint64_t sane_size; /* Memory size to use for defaults
181 * calculations */
182/* This no longer appears to be used; kill it? */
183addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
184 * virtual address known
185 * to the VM system */
186
d9a64523
A
187SECURITY_READ_ONLY_LATE(vm_offset_t) segEXTRADATA;
188SECURITY_READ_ONLY_LATE(unsigned long) segSizeEXTRADATA;
189
190SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT;
191
5ba3f43e
A
192SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
193SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
194
195
196SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
197SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
198
199SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTEXECB;
200SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXTEXEC;
201
202SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
203SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
204
205
d9a64523
A
206SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
207SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
208extern vm_offset_t intstack_low_guard;
209extern vm_offset_t intstack_high_guard;
210extern vm_offset_t excepstack_high_guard;
211
5ba3f43e
A
212SECURITY_READ_ONLY_LATE(static vm_offset_t) segLINKB;
213SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
214
215SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
216SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLD;
d9a64523
A
217SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
218SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
5ba3f43e
A
219
220SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB;
221SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT;
222
223SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB;
224SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
225
226SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB;
227SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
228
229SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB;
230SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
231
232SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0;
233SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
234
235SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB;
236SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
237
238SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB;
239SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
240
241SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
242
243SECURITY_READ_ONLY_LATE(unsigned) PAGE_SHIFT_CONST;
244
245SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
246SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
247SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
248SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
249
250vm_offset_t alloc_ptpage(boolean_t map_static);
251SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
252
253/*
254 * Bootstrap the system enough to run with virtual memory.
255 * Map the kernel's code and data, and allocate the system page table.
256 * Page_size must already be set.
257 *
258 * Parameters:
259 * first_avail: first available physical page -
260 * after kernel page tables
261 * avail_start: PA of first physical page
262 * avail_end: PA of last physical page
263 */
264SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail;
265SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end;
266SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start;
267SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end;
d9a64523 268SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end;
5ba3f43e 269
5c9f4661
A
270#if __ARM_KERNEL_PROTECT__
271extern void ExceptionVectorsBase;
272extern void ExceptionVectorsEnd;
273#endif /* __ARM_KERNEL_PROTECT__ */
274
5ba3f43e
A
275#if defined(KERNEL_INTEGRITY_KTRR)
276#if __ARM64_TWO_LEVEL_PMAP__
277/* We could support this configuration, but it adds memory overhead. */
278#error This configuration is not supported
279#endif
280#endif
281
d9a64523
A
282typedef struct {
283 pmap_paddr_t pa;
284 vm_map_address_t va;
285 vm_size_t len;
286} ptov_table_entry;
287
288#define PTOV_TABLE_SIZE 8
289SECURITY_READ_ONLY_LATE(static ptov_table_entry) ptov_table[PTOV_TABLE_SIZE];
290SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE;
291
292
293vm_map_address_t
294phystokv(pmap_paddr_t pa)
295{
296 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
297 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len)))
298 return (pa - ptov_table[i].pa + ptov_table[i].va);
299 }
300 return (pa - gPhysBase + gVirtBase);
301}
302
303vm_map_address_t
304phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
305{
306 vm_size_t len;
307 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
308 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
309 len = ptov_table[i].len - (pa - ptov_table[i].pa);
310 if (*max_len > len)
311 *max_len = len;
312 return (pa - ptov_table[i].pa + ptov_table[i].va);
313 }
314 }
315 len = PAGE_SIZE - (pa & PAGE_MASK);
316 if (*max_len > len)
317 *max_len = len;
318 return (pa - gPhysBase + gVirtBase);
319}
320
321vm_offset_t
322ml_static_vtop(vm_offset_t va)
323{
324 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
325 if ((va >= ptov_table[i].va) && (va < (ptov_table[i].va + ptov_table[i].len)))
326 return (va - ptov_table[i].va + ptov_table[i].pa);
327 }
328 if (((vm_address_t)(va) - gVirtBase) >= gPhysSize)
329 panic("ml_static_vtop(): illegal VA: %p\n", (void*)va);
330 return ((vm_address_t)(va) - gVirtBase + gPhysBase);
331}
332
5ba3f43e
A
333/*
334 * This rounds the given address up to the nearest boundary for a PTE contiguous
335 * hint.
336 */
337static vm_offset_t
338round_up_pte_hint_address(vm_offset_t address)
339{
340 vm_offset_t hint_size = ARM_PTE_SIZE << ARM_PTE_HINT_ENTRIES_SHIFT;
341 return ((address + (hint_size - 1)) & ~(hint_size - 1));
342}
343
344/* allocate a page for a page table: we support static and dynamic mappings.
345 *
346 * returns a virtual address for the allocated page
347 *
348 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
349 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
350 *
351 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
352 */
353
354vm_offset_t alloc_ptpage(boolean_t map_static) {
355 vm_offset_t vaddr;
356
357#if !(defined(KERNEL_INTEGRITY_KTRR))
358 map_static = FALSE;
359#endif
360
361 if (!ropage_next) {
362 ropage_next = (vm_offset_t)&ropagetable_begin;
363 }
364
365 if (map_static) {
366 assert(ropage_next < (vm_offset_t)&ropagetable_end);
367
368 vaddr = ropage_next;
369 ropage_next += ARM_PGBYTES;
370
371 return vaddr;
372 } else {
373 vaddr = phystokv(avail_start);
374 avail_start += ARM_PGBYTES;
375
376 return vaddr;
377 }
378}
379
380#if DEBUG
381
382void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out);
383
384void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out) {
385 unsigned int i;
386 boolean_t cur_ro, prev_ro = 0;
387 int start_entry = -1;
388 tt_entry_t cur, prev = 0;
389 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
390 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
391 boolean_t tt_static = kvtophys((vm_offset_t)tt) >= robegin &&
392 kvtophys((vm_offset_t)tt) < roend;
393
394 for(i=0; i<TTE_PGENTRIES; i++) {
395 int tte_type = tt[i] & ARM_TTE_TYPE_MASK;
396 cur = tt[i] & ARM_TTE_TABLE_MASK;
397
398 if (tt_static) {
399 /* addresses mapped by this entry are static if it is a block mapping,
400 * or the table was allocated from the RO page table region */
401 cur_ro = (tte_type == ARM_TTE_TYPE_BLOCK) || (cur >= robegin && cur < roend);
402 } else {
403 cur_ro = 0;
404 }
405
406 if ((cur == 0 && prev != 0) || (cur_ro != prev_ro && prev != 0)) { // falling edge
407 uintptr_t start,end,sz;
408
409 start = (uintptr_t)start_entry << ARM_TT_L2_SHIFT;
410 start += tt_base;
411 end = ((uintptr_t)i << ARM_TT_L2_SHIFT) - 1;
412 end += tt_base;
413
414 sz = end - start + 1;
415 printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n",
416 indent*4, "",
417 (uint32_t)(start >> 32),(uint32_t)start,
418 (uint32_t)(end >> 32),(uint32_t)end,
419 prev_ro ? "Static " : "Dynamic",
420 (sz >> 20));
421
422 if (prev_ro) {
423 *rosz_out += sz;
424 } else {
425 *rwsz_out += sz;
426 }
427 }
428
429 if ((prev == 0 && cur != 0) || cur_ro != prev_ro) { // rising edge: set start
430 start_entry = i;
431 }
432
433 prev = cur;
434 prev_ro = cur_ro;
435 }
436}
437
438void dump_kva_space() {
439 uint64_t tot_rosz=0, tot_rwsz=0;
440 int ro_ptpages, rw_ptpages;
441 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
442 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
443 boolean_t root_static = kvtophys((vm_offset_t)cpu_tte) >= robegin &&
444 kvtophys((vm_offset_t)cpu_tte) < roend;
445 uint64_t kva_base = ~((1ULL << (64 - T1SZ_BOOT)) - 1);
446
447 printf("Root page table: %s\n", root_static ? "Static" : "Dynamic");
448
449#if !__ARM64_TWO_LEVEL_PMAP__
450 for(unsigned int i=0; i<TTE_PGENTRIES; i++) {
451 pmap_paddr_t cur;
452 boolean_t cur_ro;
453 uintptr_t start,end;
454 uint64_t rosz = 0, rwsz = 0;
455
456 if ((cpu_tte[i] & ARM_TTE_VALID) == 0)
457 continue;
458
459 cur = cpu_tte[i] & ARM_TTE_TABLE_MASK;
460 start = (uint64_t)i << ARM_TT_L1_SHIFT;
461 start = start + kva_base;
462 end = start + (ARM_TT_L1_SIZE - 1);
463 cur_ro = cur >= robegin && cur < roend;
464
465 printf("0x%08x_%08x-0x%08x_%08x %s\n",
466 (uint32_t)(start >> 32),(uint32_t)start,
467 (uint32_t)(end >> 32),(uint32_t)end,
468 cur_ro ? "Static " : "Dynamic");
469
470 dump_kva_l2(start, (tt_entry_t*)phystokv(cur), 1, &rosz, &rwsz);
471 tot_rosz += rosz;
472 tot_rwsz += rwsz;
473 }
474#else
475 dump_kva_l2(kva_base, cpu_tte, 0, &tot_rosz, &tot_rwsz);
476#endif /* !_ARM64_TWO_LEVEL_PMAP__ */
477
478 printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n",
479 tot_rosz >> 20,
480 tot_rwsz >> 20,
481 (tot_rosz >> 20) + (tot_rwsz >> 20));
482
483 ro_ptpages = (int)((ropage_next - (vm_offset_t)&ropagetable_begin) >> ARM_PGSHIFT);
484 rw_ptpages = (int)(lowGlo.lgStaticSize >> ARM_PGSHIFT);
485 printf("Pages used: static %d dynamic %d\n", ro_ptpages, rw_ptpages);
486}
487
488#endif /* DEBUG */
489
5c9f4661
A
490#if __ARM_KERNEL_PROTECT__
491/*
492 * arm_vm_map:
493 * root_ttp: The kernel virtual address for the root of the target page tables
494 * vaddr: The target virtual address
495 * pte: A page table entry value (may be ARM_PTE_EMPTY)
496 *
497 * This function installs pte at vaddr in root_ttp. Any page table pages needed
498 * to install pte will be allocated by this function.
499 */
500static void
501arm_vm_map(tt_entry_t * root_ttp, vm_offset_t vaddr, pt_entry_t pte)
502{
503 vm_offset_t ptpage = 0;
504 tt_entry_t * ttp = root_ttp;
505
506#if !__ARM64_TWO_LEVEL_PMAP__
507 tt_entry_t * l1_ttep = NULL;
508 tt_entry_t l1_tte = 0;
509#endif
510
511 tt_entry_t * l2_ttep = NULL;
512 tt_entry_t l2_tte = 0;
513 pt_entry_t * ptep = NULL;
514 pt_entry_t cpte = 0;
515
516 /*
517 * Walk the target page table to find the PTE for the given virtual
518 * address. Allocate any page table pages needed to do this.
519 */
520#if !__ARM64_TWO_LEVEL_PMAP__
521 l1_ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
522 l1_tte = *l1_ttep;
523
524 if (l1_tte == ARM_TTE_EMPTY) {
525 ptpage = alloc_ptpage(TRUE);
526 bzero((void *)ptpage, ARM_PGBYTES);
527 l1_tte = kvtophys(ptpage);
528 l1_tte &= ARM_TTE_TABLE_MASK;
529 l1_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
530 *l1_ttep = l1_tte;
531 ptpage = 0;
532 }
533
534 ttp = (tt_entry_t *)phystokv(l1_tte & ARM_TTE_TABLE_MASK);
535#endif
536
537 l2_ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
538 l2_tte = *l2_ttep;
539
540 if (l2_tte == ARM_TTE_EMPTY) {
541 ptpage = alloc_ptpage(TRUE);
542 bzero((void *)ptpage, ARM_PGBYTES);
543 l2_tte = kvtophys(ptpage);
544 l2_tte &= ARM_TTE_TABLE_MASK;
545 l2_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
546 *l2_ttep = l2_tte;
547 ptpage = 0;
548 }
549
550 ttp = (tt_entry_t *)phystokv(l2_tte & ARM_TTE_TABLE_MASK);
551
552 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
553 cpte = *ptep;
554
555 /*
556 * If the existing PTE is not empty, then we are replacing a valid
557 * mapping.
558 */
559 if (cpte != ARM_PTE_EMPTY) {
560 panic("%s: cpte=%#llx is not empty, "
561 "vaddr=%#lx, pte=%#llx",
562 __FUNCTION__, cpte,
563 vaddr, pte);
564 }
565
566 *ptep = pte;
567}
568
569/*
570 * arm_vm_kernel_el0_map:
571 * vaddr: The target virtual address
572 * pte: A page table entry value (may be ARM_PTE_EMPTY)
573 *
574 * This function installs pte at vaddr for the EL0 kernel mappings.
575 */
576static void
577arm_vm_kernel_el0_map(vm_offset_t vaddr, pt_entry_t pte)
578{
579 /* Calculate where vaddr will be in the EL1 kernel page tables. */
580 vm_offset_t kernel_pmap_vaddr = vaddr - ((ARM_TT_ROOT_INDEX_MASK + ARM_TT_ROOT_SIZE) / 2ULL);
581 arm_vm_map(cpu_tte, kernel_pmap_vaddr, pte);
582}
583
584/*
585 * arm_vm_kernel_el1_map:
586 * vaddr: The target virtual address
587 * pte: A page table entry value (may be ARM_PTE_EMPTY)
588 *
589 * This function installs pte at vaddr for the EL1 kernel mappings.
590 */
591static void
592arm_vm_kernel_el1_map(vm_offset_t vaddr, pt_entry_t pte) {
593 arm_vm_map(cpu_tte, vaddr, pte);
594}
595
596/*
597 * arm_vm_kernel_pte:
598 * vaddr: The target virtual address
599 *
600 * This function returns the PTE value for the given vaddr from the kernel page
601 * tables. If the region has been been block mapped, we return what an
602 * equivalent PTE value would be (as regards permissions and flags). We also
603 * remove the HINT bit (as we are not necessarily creating contiguous mappings.
604 */
605static pt_entry_t
606arm_vm_kernel_pte(vm_offset_t vaddr)
607{
608 tt_entry_t * ttp = cpu_tte;
609 tt_entry_t * ttep = NULL;
610 tt_entry_t tte = 0;
611 pt_entry_t * ptep = NULL;
612 pt_entry_t pte = 0;
613
614#if !__ARM64_TWO_LEVEL_PMAP__
615 ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
616 tte = *ttep;
617
618 assert(tte & ARM_TTE_VALID);
619
620 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
621 /* This is a block mapping; return the equivalent PTE value. */
622 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
623 pte |= ARM_PTE_TYPE_VALID;
624 pte |= vaddr & ((ARM_TT_L1_SIZE - 1) & ARM_PTE_PAGE_MASK);
625 pte &= ~ARM_PTE_HINT_MASK;
626 return pte;
627 }
628
629 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
630#endif
631 ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
632 tte = *ttep;
633
634 assert(tte & ARM_TTE_VALID);
635
636 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
637 /* This is a block mapping; return the equivalent PTE value. */
638 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
639 pte |= ARM_PTE_TYPE_VALID;
640 pte |= vaddr & ((ARM_TT_L2_SIZE - 1) & ARM_PTE_PAGE_MASK);
641 pte &= ~ARM_PTE_HINT_MASK;
642 return pte;
643 }
644
645 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
646
647 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
648 pte = *ptep;
649 pte &= ~ARM_PTE_HINT_MASK;
650 return pte;
651}
652
653/*
654 * arm_vm_prepare_kernel_el0_mappings:
655 * alloc_only: Indicates if PTE values should be copied from the EL1 kernel
656 * mappings.
657 *
658 * This function expands the kernel page tables to support the EL0 kernel
659 * mappings, and conditionally installs the PTE values for the EL0 kernel
660 * mappings (if alloc_only is false).
661 */
662static void
663arm_vm_prepare_kernel_el0_mappings(bool alloc_only)
664{
665 pt_entry_t pte = 0;
666 vm_offset_t start = ((vm_offset_t)&ExceptionVectorsBase) & ~PAGE_MASK;
667 vm_offset_t end = (((vm_offset_t)&ExceptionVectorsEnd) + PAGE_MASK) & ~PAGE_MASK;
668 vm_offset_t cur = 0;
669 vm_offset_t cur_fixed = 0;
670
671 /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */
672 for (cur = start, cur_fixed = ARM_KERNEL_PROTECT_EXCEPTION_START; cur < end; cur += ARM_PGBYTES, cur_fixed += ARM_PGBYTES) {
673 /*
674 * We map the exception vectors at a different address than that
675 * of the kernelcache to avoid sharing page table pages with the
676 * kernelcache (as this may cause issues with TLB caching of
677 * page table pages.
678 */
679 if (!alloc_only) {
680 pte = arm_vm_kernel_pte(cur);
681 }
682
683 arm_vm_kernel_el1_map(cur_fixed, pte);
684 arm_vm_kernel_el0_map(cur_fixed, pte);
685 }
686
687 __builtin_arm_dmb(DMB_ISH);
688 __builtin_arm_isb(ISB_SY);
689
690 if (!alloc_only) {
691 /*
692 * If we have created the alternate exception vector mappings,
693 * the boot CPU may now switch over to them.
694 */
695 set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START);
696 __builtin_arm_isb(ISB_SY);
697 }
698}
699
700/*
701 * arm_vm_populate_kernel_el0_mappings:
702 *
703 * This function adds all required mappings to the EL0 kernel mappings.
704 */
705static void
706arm_vm_populate_kernel_el0_mappings(void)
707{
708 arm_vm_prepare_kernel_el0_mappings(FALSE);
709}
710
711/*
712 * arm_vm_expand_kernel_el0_mappings:
713 *
714 * This function expands the kernel page tables to accomodate the EL0 kernel
715 * mappings.
716 */
717static void
718arm_vm_expand_kernel_el0_mappings(void)
719{
720 arm_vm_prepare_kernel_el0_mappings(TRUE);
721}
722#endif /* __ARM_KERNEL_PROTECT__ */
723
5ba3f43e
A
724#if defined(KERNEL_INTEGRITY_KTRR)
725extern void bootstrap_instructions;
726
727/*
728 * arm_replace_identity_map takes the V=P map that we construct in start.s
729 * and repurposes it in order to have it map only the page we need in order
730 * to turn on the MMU. This prevents us from running into issues where
731 * KTRR will cause us to fault on executable block mappings that cross the
732 * KTRR boundary.
733 */
734static void arm_replace_identity_map(boot_args * args)
735{
736 vm_offset_t addr;
737 pmap_paddr_t paddr;
738
739#if !__ARM64_TWO_LEVEL_PMAP__
740 pmap_paddr_t l1_ptp_phys = 0;
741 tt_entry_t *l1_ptp_virt = NULL;
742 tt_entry_t *tte1 = NULL;
743#endif
744 pmap_paddr_t l2_ptp_phys = 0;
745 tt_entry_t *l2_ptp_virt = NULL;
746 tt_entry_t *tte2 = NULL;
747 pmap_paddr_t l3_ptp_phys = 0;
748 pt_entry_t *l3_ptp_virt = NULL;
749 pt_entry_t *ptep = NULL;
750
751 addr = ((vm_offset_t)&bootstrap_instructions) & ~ARM_PGMASK;
752 paddr = kvtophys(addr);
753
754 /*
755 * The V=P page tables (at the time this comment was written) start
d9a64523
A
756 * after the last bit of kernel data, and consist of 1 L1 page and 1 or
757 * more L2 pages.
5ba3f43e
A
758 * Grab references to those pages, and allocate an L3 page.
759 */
5ba3f43e
A
760 l1_ptp_phys = args->topOfKernelData;
761 l1_ptp_virt = (tt_entry_t *)phystokv(l1_ptp_phys);
d9a64523 762 tte1 = &l1_ptp_virt[L1_TABLE_INDEX(paddr)];
5ba3f43e 763
d9a64523
A
764 l2_ptp_virt = L2_TABLE_VA(tte1);
765 l2_ptp_phys = (*tte1) & ARM_TTE_TABLE_MASK;
766 tte2 = &l2_ptp_virt[L2_TABLE_INDEX(paddr)];
5ba3f43e
A
767
768 l3_ptp_virt = (pt_entry_t *)alloc_ptpage(FALSE);
769 l3_ptp_phys = kvtophys((vm_offset_t)l3_ptp_virt);
d9a64523 770 ptep = &l3_ptp_virt[L3_TABLE_INDEX(paddr)];
5ba3f43e
A
771
772 /*
773 * Replace the large V=P mapping with a mapping that provides only the
774 * mappings needed to turn on the MMU.
775 */
d9a64523 776
5ba3f43e
A
777 bzero(l1_ptp_virt, ARM_PGBYTES);
778 *tte1 = ARM_TTE_BOOT_TABLE | (l2_ptp_phys & ARM_TTE_TABLE_MASK);
d9a64523 779
5ba3f43e
A
780 bzero(l2_ptp_virt, ARM_PGBYTES);
781 *tte2 = ARM_TTE_BOOT_TABLE | (l3_ptp_phys & ARM_TTE_TABLE_MASK);
782
783 *ptep = (paddr & ARM_PTE_MASK) |
784 ARM_PTE_TYPE_VALID |
785 ARM_PTE_SH(SH_OUTER_MEMORY) |
786 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) |
787 ARM_PTE_AF |
788 ARM_PTE_AP(AP_RONA) |
789 ARM_PTE_NX;
790}
791#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
792
d9a64523
A
793tt_entry_t *arm_kva_to_tte(vm_offset_t);
794
795tt_entry_t *
796arm_kva_to_tte(vm_offset_t va)
797{
798#if __ARM64_TWO_LEVEL_PMAP__
799 tt_entry_t *tte2;
800 tte2 = cpu_tte + L2_TABLE_INDEX(va);
801#else
802 tt_entry_t *tte1, *tte2;
803 tte1 = cpu_tte + L1_TABLE_INDEX(va);
804 tte2 = L2_TABLE_VA(tte1) + L2_TABLE_INDEX(va);
805#endif
806 return tte2;
807}
808
809
5ba3f43e
A
810/*
811 * arm_vm_page_granular_helper updates protections at the L3 level. It will (if
812 * neccessary) allocate a page for the L3 table and update the corresponding L2
813 * entry. Then, it will iterate over the L3 table, updating protections as necessary.
814 * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should
815 * not be invoked from a context that does not do L2 iteration separately (basically,
816 * don't call this except from arm_vm_page_granular_prot).
d9a64523
A
817 *
818 * bool force_page_granule: true: will force page level mappings for this entry
819 * false: will try to use block level mappings
5ba3f43e 820 */
d9a64523 821
5ba3f43e 822static void
d9a64523
A
823arm_vm_page_granular_helper(vm_offset_t start, vm_offset_t _end, vm_offset_t va, pmap_paddr_t pa_offset,
824 int pte_prot_APX, int pte_prot_XN, bool force_page_granule,
5ba3f43e
A
825 pt_entry_t **deferred_pte, pt_entry_t *deferred_ptmp)
826{
827 if (va & ARM_TT_L2_OFFMASK) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */
5ba3f43e 828 tt_entry_t *tte2;
5ba3f43e
A
829 tt_entry_t tmplate;
830 pmap_paddr_t pa;
831 pt_entry_t *ppte, *recursive_pte = NULL, ptmp, recursive_ptmp = 0;
832 addr64_t ppte_phys;
833 unsigned i;
834
835 va &= ~ARM_TT_L2_OFFMASK;
d9a64523 836 pa = va - gVirtBase + gPhysBase - pa_offset;
5ba3f43e 837
d9a64523
A
838 if (pa >= real_avail_end)
839 return;
5ba3f43e 840
d9a64523
A
841 tte2 = arm_kva_to_tte(va);
842
843 assert(_end >= va);
5ba3f43e
A
844 tmplate = *tte2;
845
846 if (ARM_TTE_TYPE_TABLE == (tmplate & ARM_TTE_TYPE_MASK)) {
847 /* pick up the existing page table. */
848 ppte = (pt_entry_t *)phystokv((tmplate & ARM_TTE_TABLE_MASK));
849 } else {
d9a64523
A
850 // TTE must be reincarnated with page level mappings.
851 ppte = (pt_entry_t*)alloc_ptpage(pa_offset == 0);
852 bzero(ppte, ARM_PGBYTES);
5ba3f43e
A
853 ppte_phys = kvtophys((vm_offset_t)ppte);
854
d9a64523 855 *tte2 = pa_to_tte(ppte_phys) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
5ba3f43e
A
856 }
857
d9a64523
A
858 vm_offset_t len = _end - va;
859 if ((pa + len) > real_avail_end)
860 _end -= (pa + len - real_avail_end);
861 assert((start - gVirtBase + gPhysBase - pa_offset) >= gPhysBase);
862
863 /* Round up to the nearest PAGE_SIZE boundary when creating mappings:
864 * PAGE_SIZE may be a multiple of ARM_PGBYTES, and we don't want to leave
865 * a ragged non-PAGE_SIZE-aligned edge. */
866 vm_offset_t rounded_end = round_page(_end);
5ba3f43e
A
867 /* Apply the desired protections to the specified page range */
868 for (i = 0; i <= (ARM_TT_L3_INDEX_MASK>>ARM_TT_L3_SHIFT); i++) {
d9a64523 869 if ((start <= va) && (va < rounded_end)) {
5ba3f43e
A
870
871 ptmp = pa | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE;
872 ptmp = ptmp | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
873 ptmp = ptmp | ARM_PTE_AP(pte_prot_APX);
874 ptmp = ptmp | ARM_PTE_NX;
5c9f4661
A
875#if __ARM_KERNEL_PROTECT__
876 ptmp = ptmp | ARM_PTE_NG;
877#endif /* __ARM_KERNEL_PROTECT__ */
5ba3f43e
A
878
879 if (pte_prot_XN) {
880 ptmp = ptmp | ARM_PTE_PNX;
881 }
882
883 /*
884 * If we can, apply the contiguous hint to this range. The hint is
d9a64523 885 * applicable if the current address falls within a hint-sized range that will
5ba3f43e
A
886 * be fully covered by this mapping request.
887 */
d9a64523
A
888 if ((va >= round_up_pte_hint_address(start)) && (round_up_pte_hint_address(va + 1) <= _end) &&
889 !force_page_granule && use_contiguous_hint) {
890 assert((va & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)) == ((pa & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1))));
5ba3f43e
A
891 ptmp |= ARM_PTE_HINT;
892 }
d9a64523
A
893 /*
894 * Do not change the contiguous bit on an active mapping. Even in a single-threaded
895 * environment, it's possible for prefetch to produce a TLB conflict by trying to pull in
896 * a hint-sized entry on top of one or more existing page-sized entries. It's also useful
897 * to make sure we're not trying to unhint a sub-range of a larger hinted range, which
898 * could produce a later TLB conflict.
899 */
900 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT) || ((ppte[i] & ARM_PTE_HINT) == (ptmp & ARM_PTE_HINT)));
901
902 /*
903 * If we reach an entry that maps the current pte page, delay updating it until the very end.
904 * Otherwise we might end up making the PTE page read-only, leading to a fault later on in
905 * this function if we manage to outrun the TLB. This can happen on KTRR-enabled devices when
906 * marking segDATACONST read-only. Mappings for this region may straddle a PT page boundary,
907 * so we must also defer assignment of the following PTE. We will assume that if the region
908 * were to require one or more full L3 pages, it would instead use L2 blocks where possible,
909 * therefore only requiring at most one L3 page at the beginning and one at the end.
910 */
911 if (kva_active && ((pt_entry_t*)(phystokv(pa)) == ppte)) {
5ba3f43e 912 assert(recursive_pte == NULL);
d9a64523 913 assert(!force_page_granule);
5ba3f43e
A
914 recursive_pte = &ppte[i];
915 recursive_ptmp = ptmp;
916 } else if ((deferred_pte != NULL) && (&ppte[i] == &recursive_pte[1])) {
917 assert(*deferred_pte == NULL);
918 assert(deferred_ptmp != NULL);
919 *deferred_pte = &ppte[i];
920 *deferred_ptmp = ptmp;
921 } else {
922 ppte[i] = ptmp;
923 }
924 }
925
926 va += ARM_PGBYTES;
927 pa += ARM_PGBYTES;
928 }
929 if (recursive_pte != NULL)
930 *recursive_pte = recursive_ptmp;
931 }
932}
933
934/*
935 * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and
936 * changing them. If a particular chunk necessitates L3 entries (for reasons of
937 * alignment or length, or an explicit request that the entry be fully expanded), we
938 * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic.
5ba3f43e
A
939 */
940static void
d9a64523
A
941arm_vm_page_granular_prot(vm_offset_t start, unsigned long size, pmap_paddr_t pa_offset,
942 int tte_prot_XN, int pte_prot_APX, int pte_prot_XN,
943 bool force_page_granule)
5ba3f43e
A
944{
945 pt_entry_t *deferred_pte = NULL, deferred_ptmp = 0;
946 vm_offset_t _end = start + size;
947 vm_offset_t align_start = (start + ARM_TT_L2_OFFMASK) & ~ARM_TT_L2_OFFMASK;
948
949 if (size == 0x0UL)
950 return;
951
952 if (align_start > _end) {
d9a64523 953 arm_vm_page_granular_helper(start, _end, start, pa_offset, pte_prot_APX, pte_prot_XN, force_page_granule, NULL, NULL);
5ba3f43e
A
954 return;
955 }
956
d9a64523 957 arm_vm_page_granular_helper(start, align_start, start, pa_offset, pte_prot_APX, pte_prot_XN, force_page_granule, &deferred_pte, &deferred_ptmp);
5ba3f43e 958
d9a64523
A
959 while ((_end - align_start) >= ARM_TT_L2_SIZE) {
960 if (force_page_granule)
961 arm_vm_page_granular_helper(align_start, align_start+ARM_TT_L2_SIZE, align_start + 1, pa_offset,
962 pte_prot_APX, pte_prot_XN, force_page_granule, NULL, NULL);
5ba3f43e 963 else {
d9a64523
A
964 pmap_paddr_t pa = align_start - gVirtBase + gPhysBase - pa_offset;
965 assert((pa & ARM_TT_L2_OFFMASK) == 0);
5ba3f43e 966 tt_entry_t *tte2;
5ba3f43e
A
967 tt_entry_t tmplate;
968
d9a64523 969 tte2 = arm_kva_to_tte(align_start);
5ba3f43e 970
d9a64523
A
971 if ((pa >= gPhysBase) && (pa < real_avail_end)) {
972 tmplate = (pa & ARM_TTE_BLOCK_L2_MASK) | ARM_TTE_TYPE_BLOCK
973 | ARM_TTE_VALID | ARM_TTE_BLOCK_AF | ARM_TTE_BLOCK_NX
974 | ARM_TTE_BLOCK_AP(pte_prot_APX) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY)
975 | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
976
5c9f4661 977#if __ARM_KERNEL_PROTECT__
d9a64523 978 tmplate = tmplate | ARM_TTE_BLOCK_NG;
5c9f4661 979#endif /* __ARM_KERNEL_PROTECT__ */
d9a64523
A
980 if (tte_prot_XN)
981 tmplate = tmplate | ARM_TTE_BLOCK_PNX;
5ba3f43e 982
d9a64523
A
983 *tte2 = tmplate;
984 }
5ba3f43e
A
985 }
986 align_start += ARM_TT_L2_SIZE;
987 }
988
989 if (align_start < _end)
d9a64523 990 arm_vm_page_granular_helper(align_start, _end, _end, pa_offset, pte_prot_APX, pte_prot_XN, force_page_granule, &deferred_pte, &deferred_ptmp);
5ba3f43e
A
991
992 if (deferred_pte != NULL)
993 *deferred_pte = deferred_ptmp;
994}
995
996static inline void
d9a64523 997arm_vm_page_granular_RNX(vm_offset_t start, unsigned long size, bool force_page_granule)
5ba3f43e 998{
d9a64523 999 arm_vm_page_granular_prot(start, size, 0, 1, AP_RONA, 1, force_page_granule);
5ba3f43e
A
1000}
1001
1002static inline void
d9a64523 1003arm_vm_page_granular_ROX(vm_offset_t start, unsigned long size, bool force_page_granule)
5ba3f43e 1004{
d9a64523 1005 arm_vm_page_granular_prot(start, size, 0, 0, AP_RONA, 0, force_page_granule);
5ba3f43e
A
1006}
1007
1008static inline void
d9a64523 1009arm_vm_page_granular_RWNX(vm_offset_t start, unsigned long size, bool force_page_granule)
5ba3f43e 1010{
d9a64523 1011 arm_vm_page_granular_prot(start, size, 0, 1, AP_RWNA, 1, force_page_granule);
5ba3f43e
A
1012}
1013
1014static inline void
d9a64523 1015arm_vm_page_granular_RWX(vm_offset_t start, unsigned long size, bool force_page_granule)
5ba3f43e 1016{
d9a64523 1017 arm_vm_page_granular_prot(start, size, 0, 0, AP_RWNA, 0, force_page_granule);
5ba3f43e
A
1018}
1019
d9a64523
A
1020/* used in the chosen/memory-map node, populated by iBoot. */
1021typedef struct MemoryMapFileInfo {
1022 vm_offset_t paddr;
1023 size_t length;
1024} MemoryMapFileInfo;
1025
1026
5ba3f43e
A
1027void
1028arm_vm_prot_init(boot_args * args)
1029{
5ba3f43e 1030
d9a64523
A
1031 segLOWESTTEXT = UINT64_MAX;
1032 if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) segLOWESTTEXT = segPRELINKTEXTB;
1033 assert(segSizeTEXT);
1034 if (segTEXTB < segLOWESTTEXT) segLOWESTTEXT = segTEXTB;
1035 assert(segLOWESTTEXT < UINT64_MAX);
1036
1037 segEXTRADATA = segLOWESTTEXT;
1038 segSizeEXTRADATA = 0;
1039
1040 DTEntry memory_map;
1041 MemoryMapFileInfo *trustCacheRange;
1042 unsigned int trustCacheRangeSize;
1043 int err;
1044
1045 err = DTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1046 assert(err == kSuccess);
1047
1048 err = DTGetProperty(memory_map, "TrustCache", (void**)&trustCacheRange, &trustCacheRangeSize);
1049 if (err == kSuccess) {
1050 assert(trustCacheRangeSize == sizeof(MemoryMapFileInfo));
1051
1052 segEXTRADATA = phystokv(trustCacheRange->paddr);
1053 segSizeEXTRADATA = trustCacheRange->length;
1054
1055 arm_vm_page_granular_RNX(segEXTRADATA, segSizeEXTRADATA, FALSE);
1056 }
5ba3f43e
A
1057
1058 /* Map coalesced kext TEXT segment RWNX for now */
1059 arm_vm_page_granular_RWNX(segPRELINKTEXTB, segSizePRELINKTEXT, FALSE); // Refined in OSKext::readPrelinkedExtensions
1060
1061 /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */
1062 arm_vm_page_granular_RWNX(segPLKDATACONSTB, segSizePLKDATACONST, FALSE); // Refined in OSKext::readPrelinkedExtensions
1063
1064 /* Map coalesced kext TEXT_EXEC segment RWX (could be empty) */
1065 arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, FALSE); // Refined in OSKext::readPrelinkedExtensions
1066
1067 /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX
d9a64523 1068 * otherwise we no longer expect any space between the coalesced kext read only segments and xnu rosegments
5ba3f43e
A
1069 */
1070 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC) {
d9a64523
A
1071 if (segSizePRELINKTEXT)
1072 arm_vm_page_granular_RWNX(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT), FALSE);
5ba3f43e
A
1073 } else {
1074 /*
1075 * If we have the new segments, we should still protect the gap between kext
1076 * read-only pages and kernel read-only pages, in the event that this gap
1077 * exists.
1078 */
1079 if ((segPLKDATACONSTB + segSizePLKDATACONST) < segTEXTB) {
1080 arm_vm_page_granular_RWNX(segPLKDATACONSTB + segSizePLKDATACONST, segTEXTB - (segPLKDATACONSTB + segSizePLKDATACONST), FALSE);
1081 }
1082 }
1083
1084 /*
1085 * Protection on kernel text is loose here to allow shenanigans early on. These
1086 * protections are tightened in arm_vm_prot_finalize(). This is necessary because
1087 * we currently patch LowResetVectorBase in cpu.c.
1088 *
1089 * TEXT segment contains mach headers and other non-executable data. This will become RONX later.
1090 */
1091 arm_vm_page_granular_RNX(segTEXTB, segSizeTEXT, FALSE);
1092
1093 /* Can DATACONST start out and stay RNX?
1094 * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
1095 * Make RNX in prot_finalize
1096 */
1097 arm_vm_page_granular_RWNX(segDATACONSTB, segSizeDATACONST, FALSE);
1098
1099 /* TEXTEXEC contains read only executable code: becomes ROX in prot_finalize */
1100 arm_vm_page_granular_RWX(segTEXTEXECB, segSizeTEXTEXEC, FALSE);
1101
1102
1103 /* DATA segment will remain RWNX */
1104 arm_vm_page_granular_RWNX(segDATAB, segSizeDATA, FALSE);
1105
d9a64523
A
1106 arm_vm_page_granular_RWNX(segBOOTDATAB, segSizeBOOTDATA, TRUE);
1107 arm_vm_page_granular_RNX((vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, TRUE);
1108 arm_vm_page_granular_RNX((vm_offset_t)&intstack_high_guard, PAGE_MAX_SIZE, TRUE);
1109 arm_vm_page_granular_RNX((vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, TRUE);
1110
5ba3f43e
A
1111 arm_vm_page_granular_ROX(segKLDB, segSizeKLD, FALSE);
1112 arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, FALSE);
d9a64523 1113 arm_vm_page_granular_RWNX(segPLKLINKEDITB, segSizePLKLINKEDIT, FALSE); // Coalesced kext LINKEDIT segment
5ba3f43e
A
1114 arm_vm_page_granular_ROX(segLASTB, segSizeLAST, FALSE); // __LAST may be empty, but we cannot assume this
1115
1116 arm_vm_page_granular_RWNX(segPRELINKDATAB, segSizePRELINKDATA, FALSE); // Prelink __DATA for kexts (RW data)
1117
1118 if (segSizePLKLLVMCOV > 0)
1119 arm_vm_page_granular_RWNX(segPLKLLVMCOVB, segSizePLKLLVMCOV, FALSE); // LLVM code coverage data
1120
5ba3f43e 1121 arm_vm_page_granular_RWNX(segPRELINKINFOB, segSizePRELINKINFO, FALSE); /* PreLinkInfoDictionary */
5ba3f43e 1122
d9a64523
A
1123 arm_vm_page_granular_RNX(phystokv(args->topOfKernelData), BOOTSTRAP_TABLE_SIZE, FALSE); // Boot page tables; they should not be mutable.
1124}
5ba3f43e 1125
d9a64523
A
1126/*
1127 * return < 0 for a < b
1128 * 0 for a == b
1129 * > 0 for a > b
1130 */
1131typedef int (*cmpfunc_t)(const void *a, const void *b);
1132
1133extern void
1134qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1135
1136static int
1137cmp_ptov_entries(const void *a, const void *b)
1138{
1139 const ptov_table_entry *entry_a = a;
1140 const ptov_table_entry *entry_b = b;
1141 // Sort in descending order of segment length
1142 if (entry_a->len < entry_b->len)
1143 return 1;
1144 else if (entry_a->len > entry_b->len)
1145 return -1;
1146 else
1147 return 0;
1148}
1149
1150SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1151
1152#define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1153
1154static void
1155arm_vm_physmap_slide(ptov_table_entry *temp_ptov_table, vm_map_address_t physmap_base, vm_map_address_t orig_va, vm_size_t len, int pte_prot_APX, boolean_t force_page_granule)
1156{
1157 pmap_paddr_t pa_offset;
1158
1159 assert(ptov_index < PTOV_TABLE_SIZE);
1160 assert((orig_va & ARM_PGMASK) == 0);
1161 temp_ptov_table[ptov_index].pa = orig_va - gVirtBase + gPhysBase;
1162 if (ptov_index == 0)
1163 temp_ptov_table[ptov_index].va = physmap_base;
1164 else
1165 temp_ptov_table[ptov_index].va = temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len;
1166 if (!force_page_granule) {
1167 vm_map_address_t orig_offset = temp_ptov_table[ptov_index].pa & ARM_TT_TWIG_OFFMASK;
1168 vm_map_address_t new_offset = temp_ptov_table[ptov_index].va & ARM_TT_TWIG_OFFMASK;
1169 if (new_offset < orig_offset)
1170 temp_ptov_table[ptov_index].va += (orig_offset - new_offset);
1171 else if (new_offset > orig_offset)
1172 temp_ptov_table[ptov_index].va = ROUND_TWIG(temp_ptov_table[ptov_index].va) + orig_offset;
1173 }
1174 assert((temp_ptov_table[ptov_index].va & ARM_PGMASK) == 0);
1175 temp_ptov_table[ptov_index].len = round_page(len);
1176 pa_offset = temp_ptov_table[ptov_index].va - orig_va;
1177 arm_vm_page_granular_prot(temp_ptov_table[ptov_index].va, temp_ptov_table[ptov_index].len, pa_offset, 1, pte_prot_APX, 1, force_page_granule);
1178 ++ptov_index;
5ba3f43e
A
1179}
1180
d9a64523
A
1181
1182static void
1183arm_vm_physmap_init(boot_args *args, vm_map_address_t physmap_base, vm_map_address_t dynamic_memory_begin __unused)
1184{
1185 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1186 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1187
1188 // Will be handed back to VM layer through ml_static_mfree() in arm_vm_prot_finalize()
1189 arm_vm_physmap_slide(temp_ptov_table, physmap_base, gVirtBase, segEXTRADATA - gVirtBase, AP_RWNA, FALSE);
1190
1191 arm_vm_page_granular_RWNX(end_kern, phystokv(args->topOfKernelData) - end_kern, FALSE); /* Device Tree, RAM Disk (if present), bootArgs */
1192
1193 arm_vm_physmap_slide(temp_ptov_table, physmap_base, (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE - gPhysBase + gVirtBase),
1194 real_avail_end - (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE), AP_RWNA, FALSE); // rest of physmem
1195
1196 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= dynamic_memory_begin);
1197
1198 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1199 // segments should be placed earliest in the table to optimize lookup performance.
1200 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1201
1202 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1203}
1204
1205
5ba3f43e 1206void
d9a64523 1207arm_vm_prot_finalize(boot_args * args __unused)
5ba3f43e 1208{
5ba3f43e
A
1209 /*
1210 * At this point, we are far enough along in the boot process that it will be
1211 * safe to free up all of the memory preceeding the kernel. It may in fact
1212 * be safe to do this earlier.
1213 *
1214 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1215 * as usable.
1216 */
1217
1218 /*
1219 * if old style PRELINK segment exists, free memory before it, and after it before XNU text
1220 * otherwise we're dealing with a new style kernel cache, so we should just free the
1221 * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments
1222 * should be immediately followed by XNU's TEXT segment
1223 */
1224
d9a64523 1225 ml_static_mfree(phystokv(gPhysBase), segEXTRADATA - gVirtBase);
5ba3f43e 1226
d9a64523
A
1227 /*
1228 * KTRR support means we will be mucking with these pages and trying to
1229 * protect them; we cannot free the pages to the VM if we do this.
1230 */
1231 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
5ba3f43e
A
1232 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1233 ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1234 }
1235
1236 /*
1237 * LowResetVectorBase patching should be done by now, so tighten executable
1238 * protections.
1239 */
1240 arm_vm_page_granular_ROX(segTEXTEXECB, segSizeTEXTEXEC, FALSE);
1241
1242 /* tighten permissions on kext read only data and code */
1243 if (segSizePLKDATACONST && segSizePLKTEXTEXEC) {
1244 arm_vm_page_granular_RNX(segPRELINKTEXTB, segSizePRELINKTEXT, FALSE);
1245 arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, FALSE);
1246 arm_vm_page_granular_RNX(segPLKDATACONSTB, segSizePLKDATACONST, FALSE);
1247 }
1248
d9a64523
A
1249 cpu_stack_alloc(&BootCpuData);
1250 arm64_replace_bootstack(&BootCpuData);
1251 ml_static_mfree(phystokv(segBOOTDATAB - gVirtBase + gPhysBase), segSizeBOOTDATA);
1252
5c9f4661
A
1253#if __ARM_KERNEL_PROTECT__
1254 arm_vm_populate_kernel_el0_mappings();
1255#endif /* __ARM_KERNEL_PROTECT__ */
1256
d9a64523 1257
5ba3f43e
A
1258#if defined(KERNEL_INTEGRITY_KTRR)
1259 /*
1260 * __LAST,__pinst should no longer be executable.
1261 */
1262 arm_vm_page_granular_RNX(segLASTB, segSizeLAST, FALSE);
1263
1264 /*
1265 * Must wait until all other region permissions are set before locking down DATA_CONST
1266 * as the kernel static page tables live in DATA_CONST on KTRR enabled systems
1267 * and will become immutable.
1268 */
1269#endif
5c9f4661 1270
5ba3f43e
A
1271 arm_vm_page_granular_RNX(segDATACONSTB, segSizeDATACONST, FALSE);
1272
1273#ifndef __ARM_L1_PTW__
1274 FlushPoC_Dcache();
1275#endif
d9a64523 1276 __builtin_arm_dsb(DSB_ISH);
5ba3f43e
A
1277 flush_mmu_tlb();
1278}
1279
1280#define TBI_USER 0x1
1281#define TBI_KERNEL 0x2
1282
1283boolean_t user_tbi = TRUE;
1284
1285/*
1286 * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1287 * address accesses. It can be enabled separately for TTBR0 (user) and
1288 * TTBR1 (kernel). We enable it by default for user only, but allow both
1289 * to be controlled by the 'tbi' boot-arg.
1290 */
1291static void
1292set_tbi(void)
1293{
5c9f4661
A
1294#if !__ARM_KERNEL_PROTECT__
1295 /* If we are not built with __ARM_KERNEL_PROTECT__, TBI can be turned
1296 * off with a boot-arg.
1297 */
5ba3f43e
A
1298 uint64_t old_tcr, new_tcr;
1299 int tbi = 0;
1300
1301 if (PE_parse_boot_argn("tbi", &tbi, sizeof(tbi)))
1302 user_tbi = ((tbi & TBI_USER) == TBI_USER);
1303 old_tcr = new_tcr = get_tcr();
1304 new_tcr |= (user_tbi) ? TCR_TBI0_TOPBYTE_IGNORED : 0;
1305 new_tcr |= (tbi & TBI_KERNEL) ? TCR_TBI1_TOPBYTE_IGNORED : 0;
1306
1307 if (old_tcr != new_tcr) {
1308 set_tcr(new_tcr);
1309 sysreg_restore.tcr_el1 = new_tcr;
1310 }
5c9f4661 1311#endif /* !__ARM_KERNEL_PROTECT__ */
5ba3f43e
A
1312}
1313
d9a64523
A
1314#define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1315#define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1316
5ba3f43e
A
1317void
1318arm_vm_init(uint64_t memory_size, boot_args * args)
1319{
1320#if !__ARM64_TWO_LEVEL_PMAP__
1321 vm_map_address_t va_l1, va_l1_end;
5ba3f43e
A
1322 tt_entry_t *cpu_l1_tte;
1323#else
1324 /*
1325 * If we are using two level page tables, rather than the
1326 * 3 level page tables that xnu defaults to for ARM64,
1327 * then a great deal of the code in this path becomes
1328 * redundant. As a result, most of the logic having to
1329 * do with L1 pages will be excluded from such
1330 * configurations in this function.
1331 */
1332#endif
1333 vm_map_address_t va_l2, va_l2_end;
5ba3f43e
A
1334 tt_entry_t *cpu_l2_tte;
1335 pmap_paddr_t boot_ttep;
1336 tt_entry_t *boot_tte;
1337 uint64_t mem_segments;
1338 vm_offset_t ptpage_vaddr;
d9a64523
A
1339 vm_map_address_t dynamic_memory_begin;
1340 vm_map_address_t physmap_base;
5ba3f43e
A
1341
1342
1343 /*
1344 * Get the virtual and physical memory base from boot_args.
1345 */
1346 gVirtBase = args->virtBase;
1347 gPhysBase = args->physBase;
1348 gPhysSize = args->memSize;
1349 mem_size = args->memSize;
1350 if ((memory_size != 0) && (mem_size > memory_size))
1351 mem_size = memory_size;
d9a64523
A
1352 if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 4))
1353 panic("Unsupported memory configuration %lx\n", mem_size);
1354
1355 physmap_base = phystokv(args->topOfKernelData) + BOOTSTRAP_TABLE_SIZE;
1356
1357 // Slide the physical aperture to a random page-aligned location within the slide range
1358 uint64_t physmap_slide = early_random() & ARM64_PHYSMAP_SLIDE_MASK & ~((uint64_t)PAGE_MASK);
1359 assert(physmap_slide < ARM64_PHYSMAP_SLIDE_RANGE);
1360
1361 physmap_base += physmap_slide;
1362
1363 static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment
1364#if KASAN
1365 /* add the KASAN stolen memory to the physmap */
1366 dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1367#else
1368 dynamic_memory_begin = static_memory_end;
1369#endif
1370 if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS)
1371 panic("Unsupported memory configuration %lx\n", mem_size);
5ba3f43e
A
1372
1373 boot_ttep = args->topOfKernelData;
1374 boot_tte = (tt_entry_t *) phystokv(boot_ttep);
1375
d9a64523
A
1376#if DEVELOPMENT || DEBUG
1377 /* Sanity check - assert that BOOTSTRAP_TABLE_SIZE is sufficiently-large to
1378 * hold our bootstrap mappings for any possible slide */
1379 size_t bytes_mapped = dynamic_memory_begin - gVirtBase;
1380 size_t l1_entries = 1 + ((bytes_mapped + ARM_TT_L1_SIZE - 1) / ARM_TT_L1_SIZE);
1381 /* 1 L1 each for V=P and KVA, plus 1 page for each L2 */
1382 size_t pages_used = 2 * (l1_entries + 1);
1383 if (pages_used > BOOTSTRAP_TABLE_SIZE) {
1384 panic("BOOTSTRAP_TABLE_SIZE too small for memory config\n");
1385 }
1386#endif
1387
1388 /*
5ba3f43e
A
1389 * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping.
1390 * TTBR1 L1, TTBR1 L2 - kernel mapping
1391 */
d9a64523 1392 avail_start = boot_ttep + BOOTSTRAP_TABLE_SIZE;
5ba3f43e
A
1393
1394#if defined(KERNEL_INTEGRITY_KTRR)
1395 arm_replace_identity_map(args);
1396#endif
1397
1398 /* Initialize invalid tte page */
1399 invalid_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1400 invalid_ttep = kvtophys((vm_offset_t)invalid_tte);
1401 bzero(invalid_tte, ARM_PGBYTES);
1402
1403 /*
1404 * Initialize l1 page table page
1405 */
1406#if __ARM64_TWO_LEVEL_PMAP__
1407 /*
1408 * If we're using a two level page table, we still need to
1409 * set the cpu_ttep to avail_start, as this will be the root
1410 * of our page table regardless of how many levels we are
1411 * using.
1412 */
1413#endif
1414 cpu_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1415 cpu_ttep = kvtophys((vm_offset_t)cpu_tte);
1416 bzero(cpu_tte, ARM_PGBYTES);
5ba3f43e
A
1417 avail_end = gPhysBase + mem_size;
1418
d9a64523
A
1419#if KASAN
1420 real_avail_end = avail_end + (shadow_ptop - shadow_pbase);
1421#else
1422 real_avail_end = avail_end;
1423#endif
1424
5ba3f43e
A
1425 /*
1426 * Initialize l1 and l2 page table pages :
1427 * map physical memory at the kernel base virtual address
1428 * cover the kernel dynamic address range section
1429 *
1430 * the so called physical aperture should be statically mapped
1431 */
5ba3f43e 1432#if !__ARM64_TWO_LEVEL_PMAP__
5ba3f43e 1433 va_l1 = gVirtBase;
d9a64523 1434 va_l1_end = dynamic_memory_begin;
5ba3f43e
A
1435 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1436
1437 while (va_l1 < va_l1_end) {
d9a64523
A
1438 if (*cpu_l1_tte == ARM_TTE_EMPTY) {
1439 /* Allocate a page and setup L1 Table TTE in L1 */
1440 ptpage_vaddr = alloc_ptpage(TRUE);
1441 *cpu_l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
1442 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1443 }
5ba3f43e 1444
d9a64523 1445 if ((va_l1 + ARM_TT_L1_SIZE) < va_l1) {
5ba3f43e 1446 /* If this is the last L1 entry, it must cover the last mapping. */
d9a64523 1447 break;
5ba3f43e
A
1448 }
1449
d9a64523 1450 va_l1 += ARM_TT_L1_SIZE;
5ba3f43e 1451 cpu_l1_tte++;
5ba3f43e
A
1452 }
1453#endif
1454
5c9f4661
A
1455#if __ARM_KERNEL_PROTECT__
1456 /* Expand the page tables to prepare for the EL0 mappings. */
1457 arm_vm_expand_kernel_el0_mappings();
1458#endif /* __ARM_KERNEL_PROTECT__ */
1459
5ba3f43e
A
1460 /*
1461 * Now retrieve addresses for end, edata, and etext from MACH-O headers
1462 */
1463 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1464 segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1465 segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1466 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1467 segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1468 segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1469 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
d9a64523
A
1470
1471 segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
5ba3f43e
A
1472 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
1473 segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
1474 segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
1475 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
1476 segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
1477 segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
1478 segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
1479
1480 (void) PE_parse_boot_argn("use_contiguous_hint", &use_contiguous_hint, sizeof(use_contiguous_hint));
1481 assert(segSizePRELINKTEXT < 0x03000000); /* 23355738 */
1482
1483 /* if one of the new segments is present, the other one better be as well */
1484 if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
1485 assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
1486 }
1487
1488 etext = (vm_offset_t) segTEXTB + segSizeTEXT;
1489 sdata = (vm_offset_t) segDATAB;
1490 edata = (vm_offset_t) segDATAB + segSizeDATA;
1491 end_kern = round_page(getlastaddr()); /* Force end to next page */
1492
1493 vm_set_page_size();
1494
1495 vm_kernel_base = segTEXTB;
1496 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
1497 vm_kext_base = segPRELINKTEXTB;
1498 vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
1499
1500 vm_prelink_stext = segPRELINKTEXTB;
1501 if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
1502 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
1503 } else {
1504 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
1505 }
1506 vm_prelink_sinfo = segPRELINKINFOB;
1507 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
1508 vm_slinkedit = segLINKB;
1509 vm_elinkedit = segLINKB + segSizeLINK;
1510
1511 vm_prelink_sdata = segPRELINKDATAB;
1512 vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
1513
1514 arm_vm_prot_init(args);
1515
1516
1517 /*
1518 * Initialize the page tables for the low globals:
1519 * cover this address range:
1520 * LOW_GLOBAL_BASE_ADDRESS + 2MB
1521 */
1522#if __ARM64_TWO_LEVEL_PMAP__
1523 va_l2 = LOW_GLOBAL_BASE_ADDRESS;
1524 cpu_l2_tte = cpu_tte + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1525#else
1526 va_l1 = va_l2 = LOW_GLOBAL_BASE_ADDRESS;
1527 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1528 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1529#endif
1530 ptpage_vaddr = alloc_ptpage(TRUE);
1531 *cpu_l2_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN;
1532 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1533
1534 /*
1535 * Initialize l2 page table pages :
1536 * cover this address range:
1537 * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS
1538 */
1539#if !__ARM64_TWO_LEVEL_PMAP__
d9a64523 1540 va_l1 = dynamic_memory_begin;
5c9f4661 1541 va_l1_end = VM_MAX_KERNEL_ADDRESS;
5ba3f43e
A
1542 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1543
1544 while (va_l1 < va_l1_end) {
1545 if (*cpu_l1_tte == ARM_TTE_EMPTY) {
1546 /* Allocate a page and setup L1 Table TTE in L1 */
1547 ptpage_vaddr = alloc_ptpage(TRUE);
d9a64523 1548 *cpu_l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
5ba3f43e
A
1549 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1550 }
1551
1552 if ((va_l1 + ARM_TT_L1_SIZE) < va_l1) {
1553 /* If this is the last L1 entry, it must cover the last mapping. */
1554 break;
1555 }
1556
1557 va_l1 += ARM_TT_L1_SIZE;
1558 cpu_l1_tte++;
1559 }
1560#endif
1561
1562#if KASAN
d9a64523
A
1563 /* record the extent of the physmap */
1564 physmap_vbase = physmap_base;
1565 physmap_vtop = static_memory_end;
5ba3f43e
A
1566 kasan_init();
1567#endif
1568
d9a64523 1569 set_tbi();
5ba3f43e 1570 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
d9a64523
A
1571
1572 arm_vm_physmap_init(args, physmap_base, dynamic_memory_begin);
5ba3f43e 1573 set_mmu_ttb_alternate(cpu_ttep & TTBR_BADDR_MASK);
5ba3f43e 1574 flush_mmu_tlb();
d9a64523
A
1575 kva_active = TRUE;
1576 // global table pointers may need to be different due to physical aperture remapping
1577 cpu_tte = (tt_entry_t*)(phystokv(cpu_ttep));
1578 invalid_tte = (tt_entry_t*)(phystokv(invalid_ttep));
5ba3f43e 1579
5ba3f43e
A
1580 sane_size = mem_size - (avail_start - gPhysBase);
1581 max_mem = mem_size;
d9a64523 1582 vm_kernel_slid_base = segLOWESTTEXT;
5ba3f43e 1583 vm_kernel_slid_top = vm_prelink_einfo;
d9a64523 1584 vm_kernel_slide = segTEXTB-VM_KERNEL_LINK_ADDRESS;
5ba3f43e
A
1585 vm_kernel_stext = segTEXTB;
1586 assert(segDATACONSTB == segTEXTB + segSizeTEXT);
d9a64523 1587 assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
5ba3f43e
A
1588 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
1589
d9a64523
A
1590 dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
1591 pmap_bootstrap(dynamic_memory_begin);
1592
1593 disable_preemption();
5ba3f43e
A
1594
1595 /*
1596 * Initialize l3 page table pages :
1597 * cover this address range:
1598 * 2MB + FrameBuffer size + 10MB for each 256MB segment
1599 */
1600
1601 mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
1602
1603#if !__ARM64_TWO_LEVEL_PMAP__
d9a64523 1604 va_l1 = dynamic_memory_begin;
5ba3f43e
A
1605 va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
1606 va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
1607 va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
1608
1609 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1610
1611 while (va_l1 < va_l1_end) {
1612
1613 va_l2 = va_l1;
1614
1615 if (((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE) < va_l1) {
1616 /* If this is the last L1 entry, it must cover the last mapping. */
1617 va_l2_end = va_l1_end;
1618 } else {
1619 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE, va_l1_end);
1620 }
1621
1622 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1623#else
d9a64523 1624 va_l2 = dynamic_memory_begin;
5ba3f43e
A
1625 va_l2_end = va_l2 + ((2 + (mem_segments * 10)) << 20);
1626 va_l2_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
1627 va_l2_end = (va_l2_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
1628 cpu_l2_tte = cpu_tte + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1629#endif
1630
1631 while (va_l2 < va_l2_end) {
1632 pt_entry_t * ptp;
1633 pmap_paddr_t ptp_phys;
1634
1635 /* Allocate a page and setup L3 Table TTE in L2 */
1636 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
1637 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
1638
1639 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
1640
d9a64523 1641 *cpu_l2_tte = (pa_to_tte (ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
5ba3f43e
A
1642
1643 va_l2 += ARM_TT_L2_SIZE;
1644 cpu_l2_tte++;
1645 };
1646#if !__ARM64_TWO_LEVEL_PMAP__
1647 va_l1 = va_l2_end;
1648 cpu_l1_tte++;
1649 }
1650#endif
1651
1652 /*
1653 * Initialize l3 page table pages :
1654 * cover this address range:
1655 * (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VM_MAX_KERNEL_ADDRESS
1656 */
1657#if !__ARM64_TWO_LEVEL_PMAP__
1658 va_l1 = VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK;
1659 va_l1_end = VM_MAX_KERNEL_ADDRESS;
1660
1661 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1662
1663 while (va_l1 < va_l1_end) {
1664
1665 va_l2 = va_l1;
1666
1667 if (((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE) < va_l1) {
1668 /* If this is the last L1 entry, it must cover the last mapping. */
1669 va_l2_end = va_l1_end;
1670 } else {
1671 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE, va_l1_end);
1672 }
1673
1674 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1675#else
1676 va_l2 = VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK;
1677 va_l2_end = VM_MAX_KERNEL_ADDRESS;
1678 cpu_l2_tte = cpu_tte + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1679#endif
1680
1681 while (va_l2 < va_l2_end) {
1682 pt_entry_t * ptp;
1683 pmap_paddr_t ptp_phys;
1684
1685 /* Allocate a page and setup L3 Table TTE in L2 */
1686 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
1687 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
1688
1689 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
1690
d9a64523 1691 *cpu_l2_tte = (pa_to_tte (ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
5ba3f43e
A
1692
1693 va_l2 += ARM_TT_L2_SIZE;
1694 cpu_l2_tte++;
1695 };
1696#if !__ARM64_TWO_LEVEL_PMAP__
1697 va_l1 = va_l2_end;
1698 cpu_l1_tte++;
1699 }
1700#endif
1701
1702#if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
1703 /*
1704 * In this configuration, the bootstrap mappings (arm_vm_init) and
1705 * the heap mappings occupy separate L1 regions. Explicitly set up
1706 * the heap L1 allocations here.
1707 */
1708 va_l1 = VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK;
1709 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1710
1711 while ((va_l1 >= (VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK)) && (va_l1 < VM_MAX_KERNEL_ADDRESS)) {
1712 /*
1713 * If the L1 entry has not yet been allocated, allocate it
1714 * now and treat it as a heap table.
1715 */
1716 if (*cpu_l1_tte == ARM_TTE_EMPTY) {
1717 tt_entry_t *new_tte = (tt_entry_t*)alloc_ptpage(FALSE);
1718 bzero(new_tte, ARM_PGBYTES);
d9a64523 1719 *cpu_l1_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
5ba3f43e
A
1720 }
1721
1722 cpu_l1_tte++;
1723 va_l1 += ARM_TT_L1_SIZE;
1724 }
1725#endif
1726
1727 /*
1728 * Adjust avail_start so that the range that the VM owns
1729 * starts on a PAGE_SIZE aligned boundary.
1730 */
1731 avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
1732
5ba3f43e
A
1733 first_avail = avail_start;
1734 patch_low_glo_static_region(args->topOfKernelData, avail_start - args->topOfKernelData);
d9a64523 1735 enable_preemption();
5ba3f43e
A
1736}
1737