2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_debug.h>
33 #include <mach/vm_types.h>
34 #include <mach/vm_param.h>
35 #include <kern/misc_protos.h>
36 #include <kern/assert.h>
37 #include <vm/vm_kern.h>
38 #include <vm/vm_page.h>
41 #include <arm64/proc_reg.h>
42 #include <arm64/lowglobals.h>
43 #include <arm/cpu_data_internal.h>
44 #include <arm/misc_protos.h>
45 #include <pexpert/arm64/boot.h>
47 #include <libkern/kernel_mach_header.h>
48 #include <libkern/section_keywords.h>
50 #if __ARM_KERNEL_PROTECT__
51 #include <arm/atomic.h>
52 #endif /* __ARM_KERNEL_PROTECT__ */
54 #if __ARM_KERNEL_PROTECT__
56 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
57 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
58 * range covered by TTBR1 in half). This must also cover the exception vectors.
60 static_assert(KERNEL_PMAP_HEAP_RANGE_START
> ARM_KERNEL_PROTECT_EXCEPTION_START
);
62 /* The exception vectors and the kernel cannot share root TTEs. */
63 static_assert((KERNEL_PMAP_HEAP_RANGE_START
& ~ARM_TT_ROOT_OFFMASK
) > ARM_KERNEL_PROTECT_EXCEPTION_START
);
66 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
67 * the exception vectors.
69 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START
) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE
+ ARM_TT_ROOT_INDEX_MASK
));
70 #endif /* __ARM_KERNEL_PROTECT__ */
73 extern vm_offset_t shadow_pbase
;
74 extern vm_offset_t shadow_ptop
;
75 extern vm_offset_t physmap_vbase
;
76 extern vm_offset_t physmap_vtop
;
80 * Denotes the end of xnu.
82 extern void *last_kernel_symbol
;
87 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kernel_base
;
88 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kernel_top
;
89 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kext_base
;
90 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kext_top
;
91 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kernel_stext
;
92 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kernel_etext
;
93 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kernel_slide
;
94 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kernel_slid_base
;
95 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_kernel_slid_top
;
97 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_prelink_stext
;
98 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_prelink_etext
;
99 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_prelink_sdata
;
100 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_prelink_edata
;
101 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_prelink_sinfo
;
102 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_prelink_einfo
;
103 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_slinkedit
;
104 SECURITY_READ_ONLY_LATE(vm_offset_t
) vm_elinkedit
;
106 /* Used by <mach/arm/vm_param.h> */
107 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase
;
108 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase
;
109 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize
;
113 * NOTE: mem_size is bogus on large memory machines.
114 * We will pin it to 0x80000000 if there is more than 2 GB
115 * This is left only for compatibility and max_mem should be used.
117 vm_offset_t mem_size
; /* Size of actual physical memory present
118 * minus any performance buffer and possibly
119 * limited by mem_limit in bytes */
120 uint64_t mem_actual
; /* The "One True" physical memory size
121 * actually, it's the highest physical
123 uint64_t max_mem
; /* Size of physical memory (bytes), adjusted
125 uint64_t sane_size
; /* Memory size to use for defaults
127 /* This no longer appears to be used; kill it? */
128 addr64_t vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Highest kernel
129 * virtual address known
130 * to the VM system */
132 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segTEXTB
;
133 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT
;
136 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segDATACONSTB
;
137 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST
;
139 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segTEXTEXECB
;
140 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXTEXEC
;
142 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segDATAB
;
143 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA
;
146 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segLINKB
;
147 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK
;
149 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segKLDB
;
150 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLD
;
151 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segLASTB
;
152 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLAST
;
154 SECURITY_READ_ONLY_LATE(vm_offset_t
) segPRELINKTEXTB
;
155 SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT
;
157 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segPLKTEXTEXECB
;
158 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC
;
160 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segPLKDATACONSTB
;
161 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST
;
163 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segPRELINKDATAB
;
164 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA
;
166 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segPLKLLVMCOVB
= 0;
167 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV
= 0;
169 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segPLKLINKEDITB
;
170 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT
;
172 SECURITY_READ_ONLY_LATE(static vm_offset_t
) segPRELINKINFOB
;
173 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO
;
175 SECURITY_READ_ONLY_LATE(static boolean_t
) use_contiguous_hint
= TRUE
;
177 SECURITY_READ_ONLY_LATE(unsigned) PAGE_SHIFT_CONST
;
179 SECURITY_READ_ONLY_LATE(vm_offset_t
) end_kern
;
180 SECURITY_READ_ONLY_LATE(vm_offset_t
) etext
;
181 SECURITY_READ_ONLY_LATE(vm_offset_t
) sdata
;
182 SECURITY_READ_ONLY_LATE(vm_offset_t
) edata
;
184 vm_offset_t
alloc_ptpage(boolean_t map_static
);
185 SECURITY_READ_ONLY_LATE(vm_offset_t
) ropage_next
;
188 * Bootstrap the system enough to run with virtual memory.
189 * Map the kernel's code and data, and allocate the system page table.
190 * Page_size must already be set.
193 * first_avail: first available physical page -
194 * after kernel page tables
195 * avail_start: PA of first physical page
196 * avail_end: PA of last physical page
198 SECURITY_READ_ONLY_LATE(vm_offset_t
) first_avail
;
199 SECURITY_READ_ONLY_LATE(vm_offset_t
) static_memory_end
;
200 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) avail_start
;
201 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) avail_end
;
203 #if __ARM_KERNEL_PROTECT__
204 extern void ExceptionVectorsBase
;
205 extern void ExceptionVectorsEnd
;
206 #endif /* __ARM_KERNEL_PROTECT__ */
208 #define MEM_SIZE_MAX 0x100000000ULL
210 #if defined(KERNEL_INTEGRITY_KTRR)
211 #if __ARM64_TWO_LEVEL_PMAP__
212 /* We could support this configuration, but it adds memory overhead. */
213 #error This configuration is not supported
218 * This rounds the given address up to the nearest boundary for a PTE contiguous
222 round_up_pte_hint_address(vm_offset_t address
)
224 vm_offset_t hint_size
= ARM_PTE_SIZE
<< ARM_PTE_HINT_ENTRIES_SHIFT
;
225 return ((address
+ (hint_size
- 1)) & ~(hint_size
- 1));
228 /* allocate a page for a page table: we support static and dynamic mappings.
230 * returns a virtual address for the allocated page
232 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
233 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
235 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
238 vm_offset_t
alloc_ptpage(boolean_t map_static
) {
241 #if !(defined(KERNEL_INTEGRITY_KTRR))
246 ropage_next
= (vm_offset_t
)&ropagetable_begin
;
250 assert(ropage_next
< (vm_offset_t
)&ropagetable_end
);
253 ropage_next
+= ARM_PGBYTES
;
257 vaddr
= phystokv(avail_start
);
258 avail_start
+= ARM_PGBYTES
;
266 void dump_kva_l2(vm_offset_t tt_base
, tt_entry_t
*tt
, int indent
, uint64_t *rosz_out
, uint64_t *rwsz_out
);
268 void dump_kva_l2(vm_offset_t tt_base
, tt_entry_t
*tt
, int indent
, uint64_t *rosz_out
, uint64_t *rwsz_out
) {
270 boolean_t cur_ro
, prev_ro
= 0;
271 int start_entry
= -1;
272 tt_entry_t cur
, prev
= 0;
273 pmap_paddr_t robegin
= kvtophys((vm_offset_t
)&ropagetable_begin
);
274 pmap_paddr_t roend
= kvtophys((vm_offset_t
)&ropagetable_end
);
275 boolean_t tt_static
= kvtophys((vm_offset_t
)tt
) >= robegin
&&
276 kvtophys((vm_offset_t
)tt
) < roend
;
278 for(i
=0; i
<TTE_PGENTRIES
; i
++) {
279 int tte_type
= tt
[i
] & ARM_TTE_TYPE_MASK
;
280 cur
= tt
[i
] & ARM_TTE_TABLE_MASK
;
283 /* addresses mapped by this entry are static if it is a block mapping,
284 * or the table was allocated from the RO page table region */
285 cur_ro
= (tte_type
== ARM_TTE_TYPE_BLOCK
) || (cur
>= robegin
&& cur
< roend
);
290 if ((cur
== 0 && prev
!= 0) || (cur_ro
!= prev_ro
&& prev
!= 0)) { // falling edge
291 uintptr_t start
,end
,sz
;
293 start
= (uintptr_t)start_entry
<< ARM_TT_L2_SHIFT
;
295 end
= ((uintptr_t)i
<< ARM_TT_L2_SHIFT
) - 1;
298 sz
= end
- start
+ 1;
299 printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n",
301 (uint32_t)(start
>> 32),(uint32_t)start
,
302 (uint32_t)(end
>> 32),(uint32_t)end
,
303 prev_ro
? "Static " : "Dynamic",
313 if ((prev
== 0 && cur
!= 0) || cur_ro
!= prev_ro
) { // rising edge: set start
322 void dump_kva_space() {
323 uint64_t tot_rosz
=0, tot_rwsz
=0;
324 int ro_ptpages
, rw_ptpages
;
325 pmap_paddr_t robegin
= kvtophys((vm_offset_t
)&ropagetable_begin
);
326 pmap_paddr_t roend
= kvtophys((vm_offset_t
)&ropagetable_end
);
327 boolean_t root_static
= kvtophys((vm_offset_t
)cpu_tte
) >= robegin
&&
328 kvtophys((vm_offset_t
)cpu_tte
) < roend
;
329 uint64_t kva_base
= ~((1ULL << (64 - T1SZ_BOOT
)) - 1);
331 printf("Root page table: %s\n", root_static
? "Static" : "Dynamic");
333 #if !__ARM64_TWO_LEVEL_PMAP__
334 for(unsigned int i
=0; i
<TTE_PGENTRIES
; i
++) {
338 uint64_t rosz
= 0, rwsz
= 0;
340 if ((cpu_tte
[i
] & ARM_TTE_VALID
) == 0)
343 cur
= cpu_tte
[i
] & ARM_TTE_TABLE_MASK
;
344 start
= (uint64_t)i
<< ARM_TT_L1_SHIFT
;
345 start
= start
+ kva_base
;
346 end
= start
+ (ARM_TT_L1_SIZE
- 1);
347 cur_ro
= cur
>= robegin
&& cur
< roend
;
349 printf("0x%08x_%08x-0x%08x_%08x %s\n",
350 (uint32_t)(start
>> 32),(uint32_t)start
,
351 (uint32_t)(end
>> 32),(uint32_t)end
,
352 cur_ro
? "Static " : "Dynamic");
354 dump_kva_l2(start
, (tt_entry_t
*)phystokv(cur
), 1, &rosz
, &rwsz
);
359 dump_kva_l2(kva_base
, cpu_tte
, 0, &tot_rosz
, &tot_rwsz
);
360 #endif /* !_ARM64_TWO_LEVEL_PMAP__ */
362 printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n",
365 (tot_rosz
>> 20) + (tot_rwsz
>> 20));
367 ro_ptpages
= (int)((ropage_next
- (vm_offset_t
)&ropagetable_begin
) >> ARM_PGSHIFT
);
368 rw_ptpages
= (int)(lowGlo
.lgStaticSize
>> ARM_PGSHIFT
);
369 printf("Pages used: static %d dynamic %d\n", ro_ptpages
, rw_ptpages
);
374 #if __ARM_KERNEL_PROTECT__
377 * root_ttp: The kernel virtual address for the root of the target page tables
378 * vaddr: The target virtual address
379 * pte: A page table entry value (may be ARM_PTE_EMPTY)
381 * This function installs pte at vaddr in root_ttp. Any page table pages needed
382 * to install pte will be allocated by this function.
385 arm_vm_map(tt_entry_t
* root_ttp
, vm_offset_t vaddr
, pt_entry_t pte
)
387 vm_offset_t ptpage
= 0;
388 tt_entry_t
* ttp
= root_ttp
;
390 #if !__ARM64_TWO_LEVEL_PMAP__
391 tt_entry_t
* l1_ttep
= NULL
;
392 tt_entry_t l1_tte
= 0;
395 tt_entry_t
* l2_ttep
= NULL
;
396 tt_entry_t l2_tte
= 0;
397 pt_entry_t
* ptep
= NULL
;
401 * Walk the target page table to find the PTE for the given virtual
402 * address. Allocate any page table pages needed to do this.
404 #if !__ARM64_TWO_LEVEL_PMAP__
405 l1_ttep
= ttp
+ ((vaddr
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
408 if (l1_tte
== ARM_TTE_EMPTY
) {
409 ptpage
= alloc_ptpage(TRUE
);
410 bzero((void *)ptpage
, ARM_PGBYTES
);
411 l1_tte
= kvtophys(ptpage
);
412 l1_tte
&= ARM_TTE_TABLE_MASK
;
413 l1_tte
|= ARM_TTE_VALID
| ARM_TTE_TYPE_TABLE
;
418 ttp
= (tt_entry_t
*)phystokv(l1_tte
& ARM_TTE_TABLE_MASK
);
421 l2_ttep
= ttp
+ ((vaddr
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
424 if (l2_tte
== ARM_TTE_EMPTY
) {
425 ptpage
= alloc_ptpage(TRUE
);
426 bzero((void *)ptpage
, ARM_PGBYTES
);
427 l2_tte
= kvtophys(ptpage
);
428 l2_tte
&= ARM_TTE_TABLE_MASK
;
429 l2_tte
|= ARM_TTE_VALID
| ARM_TTE_TYPE_TABLE
;
434 ttp
= (tt_entry_t
*)phystokv(l2_tte
& ARM_TTE_TABLE_MASK
);
436 ptep
= ttp
+ ((vaddr
& ARM_TT_L3_INDEX_MASK
) >> ARM_TT_L3_SHIFT
);
440 * If the existing PTE is not empty, then we are replacing a valid
443 if (cpte
!= ARM_PTE_EMPTY
) {
444 panic("%s: cpte=%#llx is not empty, "
445 "vaddr=%#lx, pte=%#llx",
454 * arm_vm_kernel_el0_map:
455 * vaddr: The target virtual address
456 * pte: A page table entry value (may be ARM_PTE_EMPTY)
458 * This function installs pte at vaddr for the EL0 kernel mappings.
461 arm_vm_kernel_el0_map(vm_offset_t vaddr
, pt_entry_t pte
)
463 /* Calculate where vaddr will be in the EL1 kernel page tables. */
464 vm_offset_t kernel_pmap_vaddr
= vaddr
- ((ARM_TT_ROOT_INDEX_MASK
+ ARM_TT_ROOT_SIZE
) / 2ULL);
465 arm_vm_map(cpu_tte
, kernel_pmap_vaddr
, pte
);
469 * arm_vm_kernel_el1_map:
470 * vaddr: The target virtual address
471 * pte: A page table entry value (may be ARM_PTE_EMPTY)
473 * This function installs pte at vaddr for the EL1 kernel mappings.
476 arm_vm_kernel_el1_map(vm_offset_t vaddr
, pt_entry_t pte
) {
477 arm_vm_map(cpu_tte
, vaddr
, pte
);
482 * vaddr: The target virtual address
484 * This function returns the PTE value for the given vaddr from the kernel page
485 * tables. If the region has been been block mapped, we return what an
486 * equivalent PTE value would be (as regards permissions and flags). We also
487 * remove the HINT bit (as we are not necessarily creating contiguous mappings.
490 arm_vm_kernel_pte(vm_offset_t vaddr
)
492 tt_entry_t
* ttp
= cpu_tte
;
493 tt_entry_t
* ttep
= NULL
;
495 pt_entry_t
* ptep
= NULL
;
498 #if !__ARM64_TWO_LEVEL_PMAP__
499 ttep
= ttp
+ ((vaddr
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
502 assert(tte
& ARM_TTE_VALID
);
504 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
505 /* This is a block mapping; return the equivalent PTE value. */
506 pte
= (pt_entry_t
)(tte
& ~ARM_TTE_TYPE_MASK
);
507 pte
|= ARM_PTE_TYPE_VALID
;
508 pte
|= vaddr
& ((ARM_TT_L1_SIZE
- 1) & ARM_PTE_PAGE_MASK
);
509 pte
&= ~ARM_PTE_HINT_MASK
;
513 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
515 ttep
= ttp
+ ((vaddr
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
518 assert(tte
& ARM_TTE_VALID
);
520 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
521 /* This is a block mapping; return the equivalent PTE value. */
522 pte
= (pt_entry_t
)(tte
& ~ARM_TTE_TYPE_MASK
);
523 pte
|= ARM_PTE_TYPE_VALID
;
524 pte
|= vaddr
& ((ARM_TT_L2_SIZE
- 1) & ARM_PTE_PAGE_MASK
);
525 pte
&= ~ARM_PTE_HINT_MASK
;
529 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
531 ptep
= ttp
+ ((vaddr
& ARM_TT_L3_INDEX_MASK
) >> ARM_TT_L3_SHIFT
);
533 pte
&= ~ARM_PTE_HINT_MASK
;
538 * arm_vm_prepare_kernel_el0_mappings:
539 * alloc_only: Indicates if PTE values should be copied from the EL1 kernel
542 * This function expands the kernel page tables to support the EL0 kernel
543 * mappings, and conditionally installs the PTE values for the EL0 kernel
544 * mappings (if alloc_only is false).
547 arm_vm_prepare_kernel_el0_mappings(bool alloc_only
)
550 vm_offset_t start
= ((vm_offset_t
)&ExceptionVectorsBase
) & ~PAGE_MASK
;
551 vm_offset_t end
= (((vm_offset_t
)&ExceptionVectorsEnd
) + PAGE_MASK
) & ~PAGE_MASK
;
553 vm_offset_t cur_fixed
= 0;
555 /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */
556 for (cur
= start
, cur_fixed
= ARM_KERNEL_PROTECT_EXCEPTION_START
; cur
< end
; cur
+= ARM_PGBYTES
, cur_fixed
+= ARM_PGBYTES
) {
558 * We map the exception vectors at a different address than that
559 * of the kernelcache to avoid sharing page table pages with the
560 * kernelcache (as this may cause issues with TLB caching of
564 pte
= arm_vm_kernel_pte(cur
);
567 arm_vm_kernel_el1_map(cur_fixed
, pte
);
568 arm_vm_kernel_el0_map(cur_fixed
, pte
);
571 __builtin_arm_dmb(DMB_ISH
);
572 __builtin_arm_isb(ISB_SY
);
576 * If we have created the alternate exception vector mappings,
577 * the boot CPU may now switch over to them.
579 set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START
);
580 __builtin_arm_isb(ISB_SY
);
585 * arm_vm_populate_kernel_el0_mappings:
587 * This function adds all required mappings to the EL0 kernel mappings.
590 arm_vm_populate_kernel_el0_mappings(void)
592 arm_vm_prepare_kernel_el0_mappings(FALSE
);
596 * arm_vm_expand_kernel_el0_mappings:
598 * This function expands the kernel page tables to accomodate the EL0 kernel
602 arm_vm_expand_kernel_el0_mappings(void)
604 arm_vm_prepare_kernel_el0_mappings(TRUE
);
606 #endif /* __ARM_KERNEL_PROTECT__ */
608 #if defined(KERNEL_INTEGRITY_KTRR)
609 extern void bootstrap_instructions
;
612 * arm_replace_identity_map takes the V=P map that we construct in start.s
613 * and repurposes it in order to have it map only the page we need in order
614 * to turn on the MMU. This prevents us from running into issues where
615 * KTRR will cause us to fault on executable block mappings that cross the
618 static void arm_replace_identity_map(boot_args
* args
)
623 #if !__ARM64_TWO_LEVEL_PMAP__
624 pmap_paddr_t l1_ptp_phys
= 0;
625 tt_entry_t
*l1_ptp_virt
= NULL
;
626 tt_entry_t
*tte1
= NULL
;
628 pmap_paddr_t l2_ptp_phys
= 0;
629 tt_entry_t
*l2_ptp_virt
= NULL
;
630 tt_entry_t
*tte2
= NULL
;
631 pmap_paddr_t l3_ptp_phys
= 0;
632 pt_entry_t
*l3_ptp_virt
= NULL
;
633 pt_entry_t
*ptep
= NULL
;
635 addr
= ((vm_offset_t
)&bootstrap_instructions
) & ~ARM_PGMASK
;
636 paddr
= kvtophys(addr
);
639 * The V=P page tables (at the time this comment was written) start
640 * after the last bit of kernel data, and consist of 1 to 2 pages.
641 * Grab references to those pages, and allocate an L3 page.
643 #if !__ARM64_TWO_LEVEL_PMAP__
644 l1_ptp_phys
= args
->topOfKernelData
;
645 l1_ptp_virt
= (tt_entry_t
*)phystokv(l1_ptp_phys
);
646 tte1
= &l1_ptp_virt
[(((paddr
) & ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
)];
648 l2_ptp_phys
= l1_ptp_phys
+ ARM_PGBYTES
;
650 l2_ptp_phys
= args
->topOfKernelData
;
652 l2_ptp_virt
= (tt_entry_t
*)phystokv(l2_ptp_phys
);
653 tte2
= &l2_ptp_virt
[(((paddr
) & ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
)];
655 l3_ptp_virt
= (pt_entry_t
*)alloc_ptpage(FALSE
);
656 l3_ptp_phys
= kvtophys((vm_offset_t
)l3_ptp_virt
);
657 ptep
= &l3_ptp_virt
[(((paddr
) & ARM_TT_L3_INDEX_MASK
) >> ARM_TT_L3_SHIFT
)];
660 * Replace the large V=P mapping with a mapping that provides only the
661 * mappings needed to turn on the MMU.
663 #if !__ARM64_TWO_LEVEL_PMAP__
664 bzero(l1_ptp_virt
, ARM_PGBYTES
);
665 *tte1
= ARM_TTE_BOOT_TABLE
| (l2_ptp_phys
& ARM_TTE_TABLE_MASK
);
667 bzero(l2_ptp_virt
, ARM_PGBYTES
);
668 *tte2
= ARM_TTE_BOOT_TABLE
| (l3_ptp_phys
& ARM_TTE_TABLE_MASK
);
670 *ptep
= (paddr
& ARM_PTE_MASK
) |
672 ARM_PTE_SH(SH_OUTER_MEMORY
) |
673 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
) |
675 ARM_PTE_AP(AP_RONA
) |
678 #endif /* defined(KERNEL_INTEGRITY_KTRR)*/
681 * arm_vm_page_granular_helper updates protections at the L3 level. It will (if
682 * neccessary) allocate a page for the L3 table and update the corresponding L2
683 * entry. Then, it will iterate over the L3 table, updating protections as necessary.
684 * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should
685 * not be invoked from a context that does not do L2 iteration separately (basically,
686 * don't call this except from arm_vm_page_granular_prot).
689 arm_vm_page_granular_helper(vm_offset_t start
, vm_offset_t _end
, vm_offset_t va
,
690 int pte_prot_APX
, int pte_prot_XN
, int forceCoarse
,
691 pt_entry_t
**deferred_pte
, pt_entry_t
*deferred_ptmp
)
693 if (va
& ARM_TT_L2_OFFMASK
) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */
694 #if __ARM64_TWO_LEVEL_PMAP__
697 tt_entry_t
*tte1
, *tte2
;
701 pt_entry_t
*ppte
, *recursive_pte
= NULL
, ptmp
, recursive_ptmp
= 0;
705 va
&= ~ARM_TT_L2_OFFMASK
;
706 pa
= va
- gVirtBase
+ gPhysBase
;
708 #if __ARM64_TWO_LEVEL_PMAP__
709 tte2
= &cpu_tte
[(((va
) & ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
)];
711 tte1
= &cpu_tte
[(((va
) & ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
)];
712 tte2
= &((tt_entry_t
*) phystokv((*tte1
) & ARM_TTE_TABLE_MASK
))[(((va
) & ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
)];
717 if (ARM_TTE_TYPE_TABLE
== (tmplate
& ARM_TTE_TYPE_MASK
)) {
718 /* pick up the existing page table. */
719 ppte
= (pt_entry_t
*)phystokv((tmplate
& ARM_TTE_TABLE_MASK
));
721 // TTE must be reincarnated COARSE.
722 ppte
= (pt_entry_t
*)alloc_ptpage(TRUE
);
723 ppte_phys
= kvtophys((vm_offset_t
)ppte
);
725 pmap_init_pte_static_page(kernel_pmap
, ppte
, pa
);
727 *tte2
= pa_to_tte(ppte_phys
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
730 /* Apply the desired protections to the specified page range */
731 for (i
= 0; i
<= (ARM_TT_L3_INDEX_MASK
>>ARM_TT_L3_SHIFT
); i
++) {
732 if ((start
<= va
) && (va
< _end
)) {
734 ptmp
= pa
| ARM_PTE_AF
| ARM_PTE_SH(SH_OUTER_MEMORY
) | ARM_PTE_TYPE
;
735 ptmp
= ptmp
| ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
736 ptmp
= ptmp
| ARM_PTE_AP(pte_prot_APX
);
737 ptmp
= ptmp
| ARM_PTE_NX
;
738 #if __ARM_KERNEL_PROTECT__
739 ptmp
= ptmp
| ARM_PTE_NG
;
740 #endif /* __ARM_KERNEL_PROTECT__ */
743 ptmp
= ptmp
| ARM_PTE_PNX
;
747 * If we can, apply the contiguous hint to this range. The hint is
748 * applicable if we are not trying to create per-page mappings and
749 * if the current address falls within a hint-sized range that will
750 * be fully covered by this mapping request.
752 if ((va
>= round_up_pte_hint_address(start
)) && (round_up_pte_hint_address(va
+ 1) < _end
) &&
753 !forceCoarse
&& use_contiguous_hint
) {
754 ptmp
|= ARM_PTE_HINT
;
757 if ((pt_entry_t
*)(phystokv(pa
)) == ppte
) {
758 assert(recursive_pte
== NULL
);
759 /* This assert should be reenabled as part of rdar://problem/30149465 */
760 assert(!forceCoarse
);
761 recursive_pte
= &ppte
[i
];
762 recursive_ptmp
= ptmp
;
763 } else if ((deferred_pte
!= NULL
) && (&ppte
[i
] == &recursive_pte
[1])) {
764 assert(*deferred_pte
== NULL
);
765 assert(deferred_ptmp
!= NULL
);
766 *deferred_pte
= &ppte
[i
];
767 *deferred_ptmp
= ptmp
;
776 if (recursive_pte
!= NULL
)
777 *recursive_pte
= recursive_ptmp
;
782 * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and
783 * changing them. If a particular chunk necessitates L3 entries (for reasons of
784 * alignment or length, or an explicit request that the entry be fully expanded), we
785 * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic.
787 * Note that counterintuitively a forceCoarse request is a request to expand the entries
788 * out to L3, i.e. to make *finer* grained mappings. That comes from historical arm32
789 * nomenclature in which the 4K granule is "coarse" vs. the 1K "fine" granule (which we
793 arm_vm_page_granular_prot(vm_offset_t start
, unsigned long size
,
794 int tte_prot_XN
, int pte_prot_APX
, int pte_prot_XN
, int forceCoarse
)
796 pt_entry_t
*deferred_pte
= NULL
, deferred_ptmp
= 0;
797 vm_offset_t _end
= start
+ size
;
798 vm_offset_t align_start
= (start
+ ARM_TT_L2_OFFMASK
) & ~ARM_TT_L2_OFFMASK
;
803 if (align_start
> _end
) {
804 arm_vm_page_granular_helper(start
, _end
, start
, pte_prot_APX
, pte_prot_XN
, forceCoarse
, NULL
, NULL
);
808 arm_vm_page_granular_helper(start
, align_start
, start
, pte_prot_APX
, pte_prot_XN
, forceCoarse
, &deferred_pte
, &deferred_ptmp
);
810 while ((_end
- align_start
) >= ARM_TT_L2_SIZE
) {
812 arm_vm_page_granular_helper(align_start
, align_start
+ARM_TT_L2_SIZE
, align_start
+ 1,
813 pte_prot_APX
, pte_prot_XN
, forceCoarse
, NULL
, NULL
);
815 #if __ARM64_TWO_LEVEL_PMAP__
818 tt_entry_t
*tte1
, *tte2
;
822 #if __ARM64_TWO_LEVEL_PMAP__
823 tte2
= &cpu_tte
[((align_start
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
)];
825 tte1
= &cpu_tte
[((align_start
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
)];
826 tte2
= &((tt_entry_t
*) phystokv((*tte1
) & ARM_TTE_TABLE_MASK
))[((align_start
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
)];
831 tmplate
= (tmplate
& ~ARM_TTE_BLOCK_APMASK
) | ARM_TTE_BLOCK_AP(pte_prot_APX
);
832 tmplate
= tmplate
| ARM_TTE_BLOCK_NX
;
833 #if __ARM_KERNEL_PROTECT__
834 tmplate
= tmplate
| ARM_TTE_BLOCK_NG
;
835 #endif /* __ARM_KERNEL_PROTECT__ */
837 tmplate
= tmplate
| ARM_TTE_BLOCK_PNX
;
841 align_start
+= ARM_TT_L2_SIZE
;
844 if (align_start
< _end
)
845 arm_vm_page_granular_helper(align_start
, _end
, _end
, pte_prot_APX
, pte_prot_XN
, forceCoarse
, &deferred_pte
, &deferred_ptmp
);
847 if (deferred_pte
!= NULL
)
848 *deferred_pte
= deferred_ptmp
;
852 arm_vm_page_granular_RNX(vm_offset_t start
, unsigned long size
, int forceCoarse
)
854 arm_vm_page_granular_prot(start
, size
, 1, AP_RONA
, 1, forceCoarse
);
858 arm_vm_page_granular_ROX(vm_offset_t start
, unsigned long size
, int forceCoarse
)
860 arm_vm_page_granular_prot(start
, size
, 0, AP_RONA
, 0, forceCoarse
);
864 arm_vm_page_granular_RWNX(vm_offset_t start
, unsigned long size
, int forceCoarse
)
866 arm_vm_page_granular_prot(start
, size
, 1, AP_RWNA
, 1, forceCoarse
);
870 arm_vm_page_granular_RWX(vm_offset_t start
, unsigned long size
, int forceCoarse
)
872 arm_vm_page_granular_prot(start
, size
, 0, AP_RWNA
, 0, forceCoarse
);
876 arm_vm_prot_init(boot_args
* args
)
879 * Enforce W^X protections on sections that have been identified so far. This will be
880 * further refined for each KEXT's TEXT and DATA segments in readPrelinkedExtensions()
882 bool use_small_page_mappings
= FALSE
;
885 * First off, we'll create mappings for any physical memory preceeding the kernel TEXT.
886 * This is memory that we want to give to the VM; this will be accomplished through an
887 * ml_static_mfree call in arm_vm_prot_finalize. This allows the pmap/vm bootstrap
888 * routines to assume they will have a physically contiguous chunk of memory to deal
889 * with during bootstrap, while reclaiming this memory later.
891 arm_vm_page_granular_RWNX(gVirtBase
, segPRELINKTEXTB
- gVirtBase
, use_small_page_mappings
); // Memory for the VM
893 /* Map coalesced kext TEXT segment RWNX for now */
894 arm_vm_page_granular_RWNX(segPRELINKTEXTB
, segSizePRELINKTEXT
, FALSE
); // Refined in OSKext::readPrelinkedExtensions
896 /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */
897 arm_vm_page_granular_RWNX(segPLKDATACONSTB
, segSizePLKDATACONST
, FALSE
); // Refined in OSKext::readPrelinkedExtensions
899 /* Map coalesced kext TEXT_EXEC segment RWX (could be empty) */
900 arm_vm_page_granular_ROX(segPLKTEXTEXECB
, segSizePLKTEXTEXEC
, FALSE
); // Refined in OSKext::readPrelinkedExtensions
902 /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX
903 * otherwise we no longer expecting any space between the coalesced kext read only segments and xnu rosegments
905 if (!segSizePLKDATACONST
&& !segSizePLKTEXTEXEC
) {
906 arm_vm_page_granular_RWNX(segPRELINKTEXTB
+ segSizePRELINKTEXT
, segTEXTB
- (segPRELINKTEXTB
+ segSizePRELINKTEXT
), FALSE
);
909 * If we have the new segments, we should still protect the gap between kext
910 * read-only pages and kernel read-only pages, in the event that this gap
913 if ((segPLKDATACONSTB
+ segSizePLKDATACONST
) < segTEXTB
) {
914 arm_vm_page_granular_RWNX(segPLKDATACONSTB
+ segSizePLKDATACONST
, segTEXTB
- (segPLKDATACONSTB
+ segSizePLKDATACONST
), FALSE
);
919 * Protection on kernel text is loose here to allow shenanigans early on. These
920 * protections are tightened in arm_vm_prot_finalize(). This is necessary because
921 * we currently patch LowResetVectorBase in cpu.c.
923 * TEXT segment contains mach headers and other non-executable data. This will become RONX later.
925 arm_vm_page_granular_RNX(segTEXTB
, segSizeTEXT
, FALSE
);
927 /* Can DATACONST start out and stay RNX?
928 * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
929 * Make RNX in prot_finalize
931 arm_vm_page_granular_RWNX(segDATACONSTB
, segSizeDATACONST
, FALSE
);
933 /* TEXTEXEC contains read only executable code: becomes ROX in prot_finalize */
934 arm_vm_page_granular_RWX(segTEXTEXECB
, segSizeTEXTEXEC
, FALSE
);
937 /* DATA segment will remain RWNX */
938 arm_vm_page_granular_RWNX(segDATAB
, segSizeDATA
, FALSE
);
940 arm_vm_page_granular_ROX(segKLDB
, segSizeKLD
, FALSE
);
941 arm_vm_page_granular_RWNX(segLINKB
, segSizeLINK
, FALSE
);
942 arm_vm_page_granular_ROX(segLASTB
, segSizeLAST
, FALSE
); // __LAST may be empty, but we cannot assume this
944 arm_vm_page_granular_RWNX(segPRELINKDATAB
, segSizePRELINKDATA
, FALSE
); // Prelink __DATA for kexts (RW data)
946 if (segSizePLKLLVMCOV
> 0)
947 arm_vm_page_granular_RWNX(segPLKLLVMCOVB
, segSizePLKLLVMCOV
, FALSE
); // LLVM code coverage data
949 arm_vm_page_granular_RWNX(segPLKLINKEDITB
, segSizePLKLINKEDIT
, use_small_page_mappings
); // Coalesced kext LINKEDIT segment
951 arm_vm_page_granular_RWNX(segPRELINKINFOB
, segSizePRELINKINFO
, FALSE
); /* PreLinkInfoDictionary */
952 arm_vm_page_granular_RWNX(end_kern
, phystokv(args
->topOfKernelData
) - end_kern
, use_small_page_mappings
); /* Device Tree, RAM Disk (if present), bootArgs */
955 * This is offset by 4 pages to make room for the boot page tables; we could probably
956 * include them in the overall mapping, but we'll be paranoid for now.
958 vm_offset_t extra
= 0;
960 /* add the KASAN stolen memory to the physmap */
961 extra
= shadow_ptop
- shadow_pbase
;
963 /* record the extent of the physmap */
964 physmap_vbase
= phystokv(args
->topOfKernelData
) + ARM_PGBYTES
* 4;
965 physmap_vtop
= static_memory_end
;
967 arm_vm_page_granular_RNX(phystokv(args
->topOfKernelData
), ARM_PGBYTES
* 4, FALSE
); // Boot page tables; they should not be mutable.
968 arm_vm_page_granular_RWNX(phystokv(args
->topOfKernelData
) + ARM_PGBYTES
* 4,
969 extra
+ static_memory_end
- ((phystokv(args
->topOfKernelData
) + ARM_PGBYTES
* 4)), use_small_page_mappings
); // rest of physmem
973 arm_vm_prot_finalize(boot_args
* args
)
977 * At this point, we are far enough along in the boot process that it will be
978 * safe to free up all of the memory preceeding the kernel. It may in fact
979 * be safe to do this earlier.
981 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
986 * if old style PRELINK segment exists, free memory before it, and after it before XNU text
987 * otherwise we're dealing with a new style kernel cache, so we should just free the
988 * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments
989 * should be immediately followed by XNU's TEXT segment
992 ml_static_mfree(gVirtBase
, segPRELINKTEXTB
- gVirtBase
);
994 if (!segSizePLKDATACONST
&& !segSizePLKTEXTEXEC
) {
995 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
996 ml_static_mfree(segPRELINKTEXTB
+ segSizePRELINKTEXT
, segTEXTB
- (segPRELINKTEXTB
+ segSizePRELINKTEXT
));
1000 * LowResetVectorBase patching should be done by now, so tighten executable
1003 arm_vm_page_granular_ROX(segTEXTEXECB
, segSizeTEXTEXEC
, FALSE
);
1005 /* tighten permissions on kext read only data and code */
1006 if (segSizePLKDATACONST
&& segSizePLKTEXTEXEC
) {
1007 arm_vm_page_granular_RNX(segPRELINKTEXTB
, segSizePRELINKTEXT
, FALSE
);
1008 arm_vm_page_granular_ROX(segPLKTEXTEXECB
, segSizePLKTEXTEXEC
, FALSE
);
1009 arm_vm_page_granular_RNX(segPLKDATACONSTB
, segSizePLKDATACONST
, FALSE
);
1012 #if __ARM_KERNEL_PROTECT__
1013 arm_vm_populate_kernel_el0_mappings();
1014 #endif /* __ARM_KERNEL_PROTECT__ */
1016 #if defined(KERNEL_INTEGRITY_KTRR)
1018 * __LAST,__pinst should no longer be executable.
1020 arm_vm_page_granular_RNX(segLASTB
, segSizeLAST
, FALSE
);
1023 * Must wait until all other region permissions are set before locking down DATA_CONST
1024 * as the kernel static page tables live in DATA_CONST on KTRR enabled systems
1025 * and will become immutable.
1029 arm_vm_page_granular_RNX(segDATACONSTB
, segSizeDATACONST
, FALSE
);
1031 #ifndef __ARM_L1_PTW__
1037 #define TBI_USER 0x1
1038 #define TBI_KERNEL 0x2
1040 boolean_t user_tbi
= TRUE
;
1043 * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1044 * address accesses. It can be enabled separately for TTBR0 (user) and
1045 * TTBR1 (kernel). We enable it by default for user only, but allow both
1046 * to be controlled by the 'tbi' boot-arg.
1051 #if !__ARM_KERNEL_PROTECT__
1052 /* If we are not built with __ARM_KERNEL_PROTECT__, TBI can be turned
1053 * off with a boot-arg.
1055 uint64_t old_tcr
, new_tcr
;
1058 if (PE_parse_boot_argn("tbi", &tbi
, sizeof(tbi
)))
1059 user_tbi
= ((tbi
& TBI_USER
) == TBI_USER
);
1060 old_tcr
= new_tcr
= get_tcr();
1061 new_tcr
|= (user_tbi
) ? TCR_TBI0_TOPBYTE_IGNORED
: 0;
1062 new_tcr
|= (tbi
& TBI_KERNEL
) ? TCR_TBI1_TOPBYTE_IGNORED
: 0;
1064 if (old_tcr
!= new_tcr
) {
1066 sysreg_restore
.tcr_el1
= new_tcr
;
1068 #endif /* !__ARM_KERNEL_PROTECT__ */
1072 arm_vm_init(uint64_t memory_size
, boot_args
* args
)
1074 #if !__ARM64_TWO_LEVEL_PMAP__
1075 vm_map_address_t va_l1
, va_l1_end
;
1077 tt_entry_t
*cpu_l1_tte
;
1080 * If we are using two level page tables, rather than the
1081 * 3 level page tables that xnu defaults to for ARM64,
1082 * then a great deal of the code in this path becomes
1083 * redundant. As a result, most of the logic having to
1084 * do with L1 pages will be excluded from such
1085 * configurations in this function.
1088 vm_map_address_t va_l2
, va_l2_end
;
1090 tt_entry_t
*cpu_l2_tte
;
1091 pmap_paddr_t boot_ttep
;
1092 tt_entry_t
*boot_tte
;
1093 uint64_t mem_segments
;
1094 vm_offset_t ptpage_vaddr
;
1098 * Get the virtual and physical memory base from boot_args.
1100 gVirtBase
= args
->virtBase
;
1101 gPhysBase
= args
->physBase
;
1102 gPhysSize
= args
->memSize
;
1103 mem_size
= args
->memSize
;
1104 if ((memory_size
!= 0) && (mem_size
> memory_size
))
1105 mem_size
= memory_size
;
1106 if (mem_size
> MEM_SIZE_MAX
)
1107 mem_size
= MEM_SIZE_MAX
;
1108 static_memory_end
= gVirtBase
+ mem_size
;
1110 boot_ttep
= args
->topOfKernelData
;
1111 boot_tte
= (tt_entry_t
*) phystokv(boot_ttep
);
1115 * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping.
1116 * TTBR1 L1, TTBR1 L2 - kernel mapping
1118 avail_start
= boot_ttep
+ 4*ARM_PGBYTES
;
1120 #if defined(KERNEL_INTEGRITY_KTRR)
1121 arm_replace_identity_map(args
);
1124 /* Initialize invalid tte page */
1125 invalid_tte
= (tt_entry_t
*)alloc_ptpage(TRUE
);
1126 invalid_ttep
= kvtophys((vm_offset_t
)invalid_tte
);
1127 bzero(invalid_tte
, ARM_PGBYTES
);
1130 * Initialize l1 page table page
1132 #if __ARM64_TWO_LEVEL_PMAP__
1134 * If we're using a two level page table, we still need to
1135 * set the cpu_ttep to avail_start, as this will be the root
1136 * of our page table regardless of how many levels we are
1140 cpu_tte
= (tt_entry_t
*)alloc_ptpage(TRUE
);
1141 cpu_ttep
= kvtophys((vm_offset_t
)cpu_tte
);
1142 bzero(cpu_tte
, ARM_PGBYTES
);
1143 avail_end
= gPhysBase
+ mem_size
;
1146 * Initialize l1 and l2 page table pages :
1147 * map physical memory at the kernel base virtual address
1148 * cover the kernel dynamic address range section
1150 * the so called physical aperture should be statically mapped
1152 #if !__ARM64_TWO_LEVEL_PMAP__
1155 va_l1_end
= gVirtBase
+ mem_size
;
1157 /* add the KASAN stolen memory to the physmap */
1158 va_l1_end
= gVirtBase
+ (shadow_ptop
- gPhysBase
);
1160 cpu_l1_tte
= cpu_tte
+ ((va_l1
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
1162 while (va_l1
< va_l1_end
) {
1163 tt_entry_t
*new_tte
= (tt_entry_t
*)alloc_ptpage(TRUE
);
1164 /* Allocate a page and setup L1 Table TTE in L1 */
1165 *cpu_l1_tte
= (kvtophys((vm_offset_t
)new_tte
) & ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
1166 bzero((void *)new_tte
, ARM_PGBYTES
);
1170 if (((va_l1
& ~ARM_TT_L1_OFFMASK
)+ARM_TT_L1_SIZE
) < va_l1
) {
1171 /* If this is the last L1 entry, it must cover the last mapping. */
1172 va_l2_end
= va_l1_end
;
1174 va_l2_end
= MIN((va_l1
& ~ARM_TT_L1_OFFMASK
)+ARM_TT_L1_SIZE
, va_l1_end
);
1178 cpu_l2_tte
= ((tt_entry_t
*) phystokv(((*cpu_l1_tte
) & ARM_TTE_TABLE_MASK
))) + ((va_l1
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1181 va_l2_end
= gVirtBase
+ mem_size
;
1183 cpu_l2_tte
= cpu_tte
+ ((va_l2
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1186 /* add the KASAN stolen memory to the physmap */
1187 va_l2_end
= gVirtBase
+ (shadow_ptop
- gPhysBase
);
1192 while (va_l2
< va_l2_end
) {
1193 /* Set up L2 Block TTE in L2 */
1194 *cpu_l2_tte
= (pa_l2
& ARM_TTE_BLOCK_L2_MASK
) | ARM_TTE_TYPE_BLOCK
1195 | ARM_TTE_VALID
| ARM_TTE_BLOCK_AF
1196 | ARM_TTE_BLOCK_AP(AP_RWNA
) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY
)
1197 | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
1198 #if __ARM_KERNEL_PROTECT__
1199 *cpu_l2_tte
|= ARM_TTE_BLOCK_NG
;
1200 #endif /* __ARM_KERNEL_PROTECT__ */
1201 va_l2
+= ARM_TT_L2_SIZE
;
1202 pa_l2
+= ARM_TT_L2_SIZE
;
1205 #if !__ARM64_TWO_LEVEL_PMAP__
1212 #if __ARM_KERNEL_PROTECT__
1213 /* Expand the page tables to prepare for the EL0 mappings. */
1214 arm_vm_expand_kernel_el0_mappings();
1215 #endif /* __ARM_KERNEL_PROTECT__ */
1218 * Now retrieve addresses for end, edata, and etext from MACH-O headers
1220 segPRELINKTEXTB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__PRELINK_TEXT", &segSizePRELINKTEXT
);
1221 segPLKDATACONSTB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__PLK_DATA_CONST", &segSizePLKDATACONST
);
1222 segPLKTEXTEXECB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC
);
1223 segTEXTB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__TEXT", &segSizeTEXT
);
1224 segDATACONSTB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__DATA_CONST", &segSizeDATACONST
);
1225 segTEXTEXECB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__TEXT_EXEC", &segSizeTEXTEXEC
);
1226 segDATAB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__DATA", &segSizeDATA
);
1227 segLINKB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__LINKEDIT", &segSizeLINK
);
1228 segKLDB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__KLD", &segSizeKLD
);
1229 segPRELINKDATAB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__PRELINK_DATA", &segSizePRELINKDATA
);
1230 segPRELINKINFOB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__PRELINK_INFO", &segSizePRELINKINFO
);
1231 segPLKLLVMCOVB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__PLK_LLVM_COV", &segSizePLKLLVMCOV
);
1232 segPLKLINKEDITB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__PLK_LINKEDIT", &segSizePLKLINKEDIT
);
1233 segLASTB
= (vm_offset_t
) getsegdatafromheader(&_mh_execute_header
, "__LAST", &segSizeLAST
);
1235 (void) PE_parse_boot_argn("use_contiguous_hint", &use_contiguous_hint
, sizeof(use_contiguous_hint
));
1236 assert(segSizePRELINKTEXT
< 0x03000000); /* 23355738 */
1238 /* if one of the new segments is present, the other one better be as well */
1239 if (segSizePLKDATACONST
|| segSizePLKTEXTEXEC
) {
1240 assert(segSizePLKDATACONST
&& segSizePLKTEXTEXEC
);
1243 etext
= (vm_offset_t
) segTEXTB
+ segSizeTEXT
;
1244 sdata
= (vm_offset_t
) segDATAB
;
1245 edata
= (vm_offset_t
) segDATAB
+ segSizeDATA
;
1246 end_kern
= round_page(getlastaddr()); /* Force end to next page */
1250 vm_kernel_base
= segTEXTB
;
1251 vm_kernel_top
= (vm_offset_t
) &last_kernel_symbol
;
1252 vm_kext_base
= segPRELINKTEXTB
;
1253 vm_kext_top
= vm_kext_base
+ segSizePRELINKTEXT
;
1255 vm_prelink_stext
= segPRELINKTEXTB
;
1256 if (!segSizePLKTEXTEXEC
&& !segSizePLKDATACONST
) {
1257 vm_prelink_etext
= segPRELINKTEXTB
+ segSizePRELINKTEXT
;
1259 vm_prelink_etext
= segPRELINKTEXTB
+ segSizePRELINKTEXT
+ segSizePLKDATACONST
+ segSizePLKTEXTEXEC
;
1261 vm_prelink_sinfo
= segPRELINKINFOB
;
1262 vm_prelink_einfo
= segPRELINKINFOB
+ segSizePRELINKINFO
;
1263 vm_slinkedit
= segLINKB
;
1264 vm_elinkedit
= segLINKB
+ segSizeLINK
;
1266 vm_prelink_sdata
= segPRELINKDATAB
;
1267 vm_prelink_edata
= segPRELINKDATAB
+ segSizePRELINKDATA
;
1269 arm_vm_prot_init(args
);
1273 * Initialize the page tables for the low globals:
1274 * cover this address range:
1275 * LOW_GLOBAL_BASE_ADDRESS + 2MB
1277 #if __ARM64_TWO_LEVEL_PMAP__
1278 va_l2
= LOW_GLOBAL_BASE_ADDRESS
;
1279 cpu_l2_tte
= cpu_tte
+ ((va_l2
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1281 va_l1
= va_l2
= LOW_GLOBAL_BASE_ADDRESS
;
1282 cpu_l1_tte
= cpu_tte
+ ((va_l1
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
1283 cpu_l2_tte
= ((tt_entry_t
*) phystokv(((*cpu_l1_tte
) & ARM_TTE_TABLE_MASK
))) + ((va_l2
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1285 ptpage_vaddr
= alloc_ptpage(TRUE
);
1286 *cpu_l2_tte
= (kvtophys(ptpage_vaddr
) & ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
| ARM_TTE_TABLE_PXN
| ARM_TTE_TABLE_XN
;
1287 bzero((void *)ptpage_vaddr
, ARM_PGBYTES
);
1290 * Initialize l2 page table pages :
1291 * cover this address range:
1292 * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS
1294 #if !__ARM64_TWO_LEVEL_PMAP__
1295 va_l1
= (gVirtBase
+MEM_SIZE_MAX
+ ~0xFFFFFFFFFF800000ULL
) & 0xFFFFFFFFFF800000ULL
;
1296 va_l1_end
= VM_MAX_KERNEL_ADDRESS
;
1297 cpu_l1_tte
= cpu_tte
+ ((va_l1
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
1299 while (va_l1
< va_l1_end
) {
1300 if (*cpu_l1_tte
== ARM_TTE_EMPTY
) {
1301 /* Allocate a page and setup L1 Table TTE in L1 */
1302 ptpage_vaddr
= alloc_ptpage(TRUE
);
1303 *cpu_l1_tte
= (kvtophys(ptpage_vaddr
) & ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
| ARM_TTE_TABLE_PXN
| ARM_TTE_TABLE_XN
;
1304 bzero((void *)ptpage_vaddr
, ARM_PGBYTES
);
1307 if ((va_l1
+ ARM_TT_L1_SIZE
) < va_l1
) {
1308 /* If this is the last L1 entry, it must cover the last mapping. */
1312 va_l1
+= ARM_TT_L1_SIZE
;
1321 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
1322 set_mmu_ttb_alternate(cpu_ttep
& TTBR_BADDR_MASK
);
1327 * TODO: We're hardcoding the expected virtual TEXT base here;
1328 * that gives us an ugly dependency on a linker argument in
1329 * the make files. Clean this up, so we don't hardcode it
1330 * twice; this is nothing but trouble.
1332 sane_size
= mem_size
- (avail_start
- gPhysBase
);
1334 vm_kernel_slid_base
= segPRELINKTEXTB
;
1335 vm_kernel_slid_top
= vm_prelink_einfo
;
1336 vm_kernel_slide
= segTEXTB
-0xfffffff007004000;
1337 vm_kernel_stext
= segTEXTB
;
1338 assert(segDATACONSTB
== segTEXTB
+ segSizeTEXT
);
1339 assert(segTEXTEXECB
== segDATACONSTB
+ segSizeDATACONST
);
1340 vm_kernel_etext
= segTEXTB
+ segSizeTEXT
+ segSizeDATACONST
+ segSizeTEXTEXEC
;
1342 pmap_bootstrap((gVirtBase
+MEM_SIZE_MAX
+ ~0xFFFFFFFFFF800000ULL
) & 0xFFFFFFFFFF800000ULL
);
1345 * Initialize l3 page table pages :
1346 * cover this address range:
1347 * 2MB + FrameBuffer size + 10MB for each 256MB segment
1350 mem_segments
= (mem_size
+ 0x0FFFFFFF) >> 28;
1352 #if !__ARM64_TWO_LEVEL_PMAP__
1353 va_l1
= (gVirtBase
+MEM_SIZE_MAX
+ ~0xFFFFFFFFFF800000ULL
) & 0xFFFFFFFFFF800000ULL
;
1354 va_l1_end
= va_l1
+ ((2 + (mem_segments
* 10)) << 20);
1355 va_l1_end
+= round_page(args
->Video
.v_height
* args
->Video
.v_rowBytes
);
1356 va_l1_end
= (va_l1_end
+ 0x00000000007FFFFFULL
) & 0xFFFFFFFFFF800000ULL
;
1358 cpu_l1_tte
= cpu_tte
+ ((va_l1
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
1360 while (va_l1
< va_l1_end
) {
1364 if (((va_l1
& ~ARM_TT_L1_OFFMASK
)+ARM_TT_L1_SIZE
) < va_l1
) {
1365 /* If this is the last L1 entry, it must cover the last mapping. */
1366 va_l2_end
= va_l1_end
;
1368 va_l2_end
= MIN((va_l1
& ~ARM_TT_L1_OFFMASK
)+ARM_TT_L1_SIZE
, va_l1_end
);
1371 cpu_l2_tte
= ((tt_entry_t
*) phystokv(((*cpu_l1_tte
) & ARM_TTE_TABLE_MASK
))) + ((va_l2
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1373 va_l2
= (gVirtBase
+MEM_SIZE_MAX
+ ~0xFFFFFFFFFF800000ULL
) & 0xFFFFFFFFFF800000ULL
;
1374 va_l2_end
= va_l2
+ ((2 + (mem_segments
* 10)) << 20);
1375 va_l2_end
+= round_page(args
->Video
.v_height
* args
->Video
.v_rowBytes
);
1376 va_l2_end
= (va_l2_end
+ 0x00000000007FFFFFULL
) & 0xFFFFFFFFFF800000ULL
;
1377 cpu_l2_tte
= cpu_tte
+ ((va_l2
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1380 while (va_l2
< va_l2_end
) {
1382 pmap_paddr_t ptp_phys
;
1384 /* Allocate a page and setup L3 Table TTE in L2 */
1385 ptp
= (pt_entry_t
*) alloc_ptpage(FALSE
);
1386 ptp_phys
= (pmap_paddr_t
)kvtophys((vm_offset_t
)ptp
);
1388 pmap_init_pte_page(kernel_pmap
, ptp
, va_l2
, 3, TRUE
);
1390 *cpu_l2_tte
= (pa_to_tte (ptp_phys
)) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
| ARM_TTE_TABLE_PXN
| ARM_TTE_TABLE_XN
;
1392 va_l2
+= ARM_TT_L2_SIZE
;
1395 #if !__ARM64_TWO_LEVEL_PMAP__
1402 * Initialize l3 page table pages :
1403 * cover this address range:
1404 * (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VM_MAX_KERNEL_ADDRESS
1406 #if !__ARM64_TWO_LEVEL_PMAP__
1407 va_l1
= VM_MAX_KERNEL_ADDRESS
& CPUWINDOWS_BASE_MASK
;
1408 va_l1_end
= VM_MAX_KERNEL_ADDRESS
;
1410 cpu_l1_tte
= cpu_tte
+ ((va_l1
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
1412 while (va_l1
< va_l1_end
) {
1416 if (((va_l1
& ~ARM_TT_L1_OFFMASK
)+ARM_TT_L1_SIZE
) < va_l1
) {
1417 /* If this is the last L1 entry, it must cover the last mapping. */
1418 va_l2_end
= va_l1_end
;
1420 va_l2_end
= MIN((va_l1
& ~ARM_TT_L1_OFFMASK
)+ARM_TT_L1_SIZE
, va_l1_end
);
1423 cpu_l2_tte
= ((tt_entry_t
*) phystokv(((*cpu_l1_tte
) & ARM_TTE_TABLE_MASK
))) + ((va_l2
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1425 va_l2
= VM_MAX_KERNEL_ADDRESS
& CPUWINDOWS_BASE_MASK
;
1426 va_l2_end
= VM_MAX_KERNEL_ADDRESS
;
1427 cpu_l2_tte
= cpu_tte
+ ((va_l2
& ARM_TT_L2_INDEX_MASK
) >> ARM_TT_L2_SHIFT
);
1430 while (va_l2
< va_l2_end
) {
1432 pmap_paddr_t ptp_phys
;
1434 /* Allocate a page and setup L3 Table TTE in L2 */
1435 ptp
= (pt_entry_t
*) alloc_ptpage(FALSE
);
1436 ptp_phys
= (pmap_paddr_t
)kvtophys((vm_offset_t
)ptp
);
1438 pmap_init_pte_page(kernel_pmap
, ptp
, va_l2
, 3, TRUE
);
1440 *cpu_l2_tte
= (pa_to_tte (ptp_phys
)) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
| ARM_TTE_TABLE_PXN
| ARM_TTE_TABLE_XN
;
1442 va_l2
+= ARM_TT_L2_SIZE
;
1445 #if !__ARM64_TWO_LEVEL_PMAP__
1451 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
1453 * In this configuration, the bootstrap mappings (arm_vm_init) and
1454 * the heap mappings occupy separate L1 regions. Explicitly set up
1455 * the heap L1 allocations here.
1457 va_l1
= VM_MIN_KERNEL_ADDRESS
& ~ARM_TT_L1_OFFMASK
;
1458 cpu_l1_tte
= cpu_tte
+ ((va_l1
& ARM_TT_L1_INDEX_MASK
) >> ARM_TT_L1_SHIFT
);
1460 while ((va_l1
>= (VM_MIN_KERNEL_ADDRESS
& ~ARM_TT_L1_OFFMASK
)) && (va_l1
< VM_MAX_KERNEL_ADDRESS
)) {
1462 * If the L1 entry has not yet been allocated, allocate it
1463 * now and treat it as a heap table.
1465 if (*cpu_l1_tte
== ARM_TTE_EMPTY
) {
1466 tt_entry_t
*new_tte
= (tt_entry_t
*)alloc_ptpage(FALSE
);
1467 bzero(new_tte
, ARM_PGBYTES
);
1468 *cpu_l1_tte
= (kvtophys((vm_offset_t
)new_tte
) & ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
| ARM_TTE_TABLE_PXN
| ARM_TTE_TABLE_XN
;
1472 va_l1
+= ARM_TT_L1_SIZE
;
1477 * Adjust avail_start so that the range that the VM owns
1478 * starts on a PAGE_SIZE aligned boundary.
1480 avail_start
= (avail_start
+ PAGE_MASK
) & ~PAGE_MASK
;
1483 first_avail
= avail_start
;
1484 patch_low_glo_static_region(args
->topOfKernelData
, avail_start
- args
->topOfKernelData
);