]>
Commit | Line | Data |
---|---|---|
55e303ae | 1 | /* |
0a7de745 | 2 | * Copyright (c) 2003-2019 Apple Inc. All rights reserved. |
55e303ae | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0a7de745 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
55e303ae A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
0a7de745 | 31 | /* |
55e303ae A |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
0a7de745 | 35 | * |
55e303ae A |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
0a7de745 | 41 | * |
55e303ae A |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
0a7de745 | 45 | * |
55e303ae | 46 | * Carnegie Mellon requests users of this software to return to |
0a7de745 | 47 | * |
55e303ae A |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
0a7de745 | 52 | * |
55e303ae A |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | ||
55e303ae A |
57 | |
58 | #include <mach/i386/vm_param.h> | |
59 | ||
60 | #include <string.h> | |
0a7de745 | 61 | #include <stdint.h> |
55e303ae A |
62 | #include <mach/vm_param.h> |
63 | #include <mach/vm_prot.h> | |
64 | #include <mach/machine.h> | |
65 | #include <mach/time_value.h> | |
55e303ae A |
66 | #include <kern/spl.h> |
67 | #include <kern/assert.h> | |
68 | #include <kern/debug.h> | |
69 | #include <kern/misc_protos.h> | |
70 | #include <kern/startup.h> | |
71 | #include <kern/clock.h> | |
0c530ab8 | 72 | #include <kern/pms.h> |
55e303ae A |
73 | #include <kern/cpu_data.h> |
74 | #include <kern/processor.h> | |
fe8ab488 | 75 | #include <sys/kdebug.h> |
0c530ab8 | 76 | #include <console/serial_protos.h> |
55e303ae A |
77 | #include <vm/vm_page.h> |
78 | #include <vm/pmap.h> | |
79 | #include <vm/vm_kern.h> | |
6d2010ae | 80 | #include <machine/pal_routines.h> |
55e303ae A |
81 | #include <i386/fpu.h> |
82 | #include <i386/pmap.h> | |
55e303ae | 83 | #include <i386/misc_protos.h> |
b0d623f7 | 84 | #include <i386/cpu_threads.h> |
55e303ae | 85 | #include <i386/cpuid.h> |
b0d623f7 | 86 | #include <i386/lapic.h> |
55e303ae | 87 | #include <i386/mp.h> |
0c530ab8 | 88 | #include <i386/mp_desc.h> |
6d2010ae | 89 | #if CONFIG_MTRR |
b0d623f7 | 90 | #include <i386/mtrr.h> |
6d2010ae | 91 | #endif |
91447636 | 92 | #include <i386/machine_routines.h> |
b0d623f7 | 93 | #if CONFIG_MCA |
0c530ab8 | 94 | #include <i386/machine_check.h> |
b0d623f7 | 95 | #endif |
6d2010ae | 96 | #include <i386/ucode.h> |
91447636 | 97 | #include <i386/postcode.h> |
0c530ab8 A |
98 | #include <i386/Diagnostics.h> |
99 | #include <i386/pmCPU.h> | |
100 | #include <i386/tsc.h> | |
2d21ac55 | 101 | #include <i386/locks.h> /* LcksOpts */ |
6d2010ae A |
102 | #if DEBUG |
103 | #include <machine/pal_routines.h> | |
104 | #endif | |
5ba3f43e A |
105 | |
106 | #if MONOTONIC | |
107 | #include <kern/monotonic.h> | |
108 | #endif /* MONOTONIC */ | |
109 | ||
110 | #include <san/kasan.h> | |
111 | ||
b0d623f7 | 112 | #if DEBUG |
0a7de745 | 113 | #define DBG(x ...) kprintf(x) |
b0d623f7 | 114 | #else |
0a7de745 | 115 | #define DBG(x ...) |
b0d623f7 | 116 | #endif |
55e303ae | 117 | |
0a7de745 | 118 | int debug_task; |
b0d623f7 | 119 | |
0a7de745 | 120 | int early_boot = 1; |
b0d623f7 | 121 | |
0a7de745 A |
122 | static boot_args *kernelBootArgs; |
123 | ||
124 | extern int disableConsoleOutput; | |
125 | extern const char version[]; | |
126 | extern const char version_variant[]; | |
127 | extern int nx_enabled; | |
b0d623f7 | 128 | |
813fb2f6 A |
129 | /* |
130 | * Set initial values so that ml_phys_* routines can use the booter's ID mapping | |
131 | * to touch physical space before the kernel's physical aperture exists. | |
132 | */ | |
0a7de745 A |
133 | uint64_t physmap_base = 0; |
134 | uint64_t physmap_max = 4 * GB; | |
135 | ||
136 | pd_entry_t *KPTphys; | |
137 | pd_entry_t *IdlePTD; | |
138 | pdpt_entry_t *IdlePDPT; | |
139 | pml4_entry_t *IdlePML4; | |
140 | ||
141 | int kernPhysPML4Index; | |
142 | int kernPhysPML4EntryCount; | |
b0d623f7 | 143 | |
cb323159 A |
144 | /* |
145 | * These are 4K mapping page table pages from KPTphys[] that we wound | |
146 | * up not using. They get ml_static_mfree()'d once the VM is initialized. | |
147 | */ | |
148 | ppnum_t released_PT_ppn = 0; | |
149 | uint32_t released_PT_cnt = 0; | |
b0d623f7 | 150 | |
b0d623f7 | 151 | char *physfree; |
5c9f4661 | 152 | void idt64_remap(void); |
b0d623f7 A |
153 | |
154 | /* | |
155 | * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init() | |
156 | * due to the mutation of physfree. | |
157 | */ | |
158 | static void * | |
159 | ALLOCPAGES(int npages) | |
160 | { | |
161 | uintptr_t tmp = (uintptr_t)physfree; | |
162 | bzero(physfree, npages * PAGE_SIZE); | |
163 | physfree += npages * PAGE_SIZE; | |
b0d623f7 | 164 | tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK; |
b0d623f7 A |
165 | return (void *)tmp; |
166 | } | |
167 | ||
168 | static void | |
169 | fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count) | |
170 | { | |
171 | int i; | |
0a7de745 | 172 | for (i = 0; i < count; i++) { |
b0d623f7 A |
173 | base[index] = src | prot | INTEL_PTE_VALID; |
174 | src += PAGE_SIZE; | |
175 | index++; | |
176 | } | |
177 | } | |
178 | ||
6d2010ae | 179 | extern pmap_paddr_t first_avail; |
b0d623f7 | 180 | |
b0d623f7 A |
181 | int break_kprintf = 0; |
182 | ||
183 | uint64_t | |
184 | x86_64_pre_sleep(void) | |
185 | { | |
186 | IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX]; | |
6d2010ae A |
187 | uint64_t oldcr3 = get_cr3_raw(); |
188 | set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4)); | |
b0d623f7 A |
189 | return oldcr3; |
190 | } | |
191 | ||
192 | void | |
193 | x86_64_post_sleep(uint64_t new_cr3) | |
194 | { | |
195 | IdlePML4[0] = 0; | |
6d2010ae | 196 | set_cr3_raw((uint32_t) new_cr3); |
b0d623f7 A |
197 | } |
198 | ||
b0d623f7 | 199 | |
b0d623f7 | 200 | |
55e303ae | 201 | |
b0d623f7 A |
202 | // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address |
203 | // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account | |
204 | // the PCI hole (which is less 4GB but not more). | |
7ddcb079 | 205 | |
0a7de745 A |
206 | static int |
207 | physmap_init_L2(uint64_t *physStart, pt_entry_t **l2ptep) | |
208 | { | |
209 | unsigned i; | |
210 | pt_entry_t *physmapL2 = ALLOCPAGES(1); | |
211 | ||
212 | if (physmapL2 == NULL) { | |
213 | DBG("physmap_init_L2 page alloc failed when initting L2 for physAddr 0x%llx.\n", *physStart); | |
214 | *l2ptep = NULL; | |
215 | return -1; | |
216 | } | |
217 | ||
218 | for (i = 0; i < NPDPG; i++) { | |
219 | physmapL2[i] = *physStart | |
220 | | INTEL_PTE_PS | |
221 | | INTEL_PTE_VALID | |
222 | | INTEL_PTE_NX | |
223 | | INTEL_PTE_WRITE; | |
224 | ||
225 | *physStart += NBPD; | |
226 | } | |
227 | *l2ptep = physmapL2; | |
228 | return 0; | |
229 | } | |
230 | ||
231 | static int | |
232 | physmap_init_L3(int startIndex, uint64_t highest_phys, uint64_t *physStart, pt_entry_t **l3ptep) | |
233 | { | |
234 | unsigned i; | |
235 | int ret; | |
236 | pt_entry_t *l2pte; | |
237 | pt_entry_t *physmapL3 = ALLOCPAGES(1); /* ALLOCPAGES bzeroes the memory */ | |
238 | ||
239 | if (physmapL3 == NULL) { | |
240 | DBG("physmap_init_L3 page alloc failed when initting L3 for physAddr 0x%llx.\n", *physStart); | |
241 | *l3ptep = NULL; | |
242 | return -1; | |
243 | } | |
244 | ||
245 | for (i = startIndex; i < NPDPTPG && *physStart < highest_phys; i++) { | |
246 | if ((ret = physmap_init_L2(physStart, &l2pte)) < 0) { | |
247 | return ret; | |
248 | } | |
249 | ||
250 | physmapL3[i] = ((uintptr_t)ID_MAP_VTOP(l2pte)) | |
251 | | INTEL_PTE_VALID | |
252 | | INTEL_PTE_NX | |
253 | | INTEL_PTE_WRITE; | |
254 | } | |
255 | ||
256 | *l3ptep = physmapL3; | |
257 | ||
258 | return 0; | |
259 | } | |
316670eb | 260 | |
b0d623f7 | 261 | static void |
4ba76501 | 262 | physmap_init(uint8_t phys_random_L3, uint64_t *new_physmap_base, uint64_t *new_physmap_max) |
b0d623f7 | 263 | { |
0a7de745 A |
264 | pt_entry_t *l3pte; |
265 | int pml4_index, i; | |
266 | int L3_start_index; | |
267 | uint64_t physAddr = 0; | |
268 | uint64_t highest_physaddr; | |
269 | unsigned pdpte_count; | |
270 | ||
271 | #if DEVELOPMENT || DEBUG | |
272 | if (kernelBootArgs->PhysicalMemorySize > K64_MAXMEM) { | |
273 | panic("Installed physical memory exceeds configured maximum."); | |
274 | } | |
275 | #endif | |
276 | ||
277 | /* | |
278 | * Add 4GB to the loader-provided physical memory size to account for MMIO space | |
279 | * XXX in a perfect world, we'd scan PCI buses and count the max memory requested in BARs by | |
280 | * XXX all enumerated device, then add more for hot-pluggable devices. | |
281 | */ | |
282 | highest_physaddr = kernelBootArgs->PhysicalMemorySize + 4 * GB; | |
283 | ||
284 | /* | |
285 | * Calculate the number of PML4 entries we'll need. The total number of entries is | |
286 | * pdpte_count = (((highest_physaddr) >> PDPT_SHIFT) + entropy_value + | |
287 | * ((highest_physaddr & PDPT_MASK) == 0 ? 0 : 1)) | |
288 | * pml4e_count = pdpte_count >> (PML4_SHIFT - PDPT_SHIFT) | |
289 | */ | |
290 | assert(highest_physaddr < (UINT64_MAX - PDPTMASK)); | |
291 | pdpte_count = (unsigned) (((highest_physaddr + PDPTMASK) >> PDPTSHIFT) + phys_random_L3); | |
292 | kernPhysPML4EntryCount = (pdpte_count + ((1U << (PML4SHIFT - PDPTSHIFT)) - 1)) >> (PML4SHIFT - PDPTSHIFT); | |
293 | if (kernPhysPML4EntryCount == 0) { | |
294 | kernPhysPML4EntryCount = 1; | |
295 | } | |
296 | if (kernPhysPML4EntryCount > KERNEL_PHYSMAP_PML4_COUNT_MAX) { | |
297 | #if DEVELOPMENT || DEBUG | |
298 | panic("physmap too large"); | |
299 | #else | |
300 | kprintf("[pmap] Limiting physmap to %d PML4s (was %d)\n", KERNEL_PHYSMAP_PML4_COUNT_MAX, | |
301 | kernPhysPML4EntryCount); | |
302 | kernPhysPML4EntryCount = KERNEL_PHYSMAP_PML4_COUNT_MAX; | |
303 | #endif | |
304 | } | |
305 | ||
306 | kernPhysPML4Index = KERNEL_KEXTS_INDEX - kernPhysPML4EntryCount; /* utb: KERNEL_PHYSMAP_PML4_INDEX */ | |
307 | ||
308 | /* | |
309 | * XXX: Make sure that the addresses returned for physmapL3 and physmapL2 plus their extents | |
310 | * are in the system-available memory range | |
311 | */ | |
b0d623f7 | 312 | |
316670eb A |
313 | |
314 | /* We assume NX support. Mark all levels of the PHYSMAP NX | |
315 | * to avoid granting executability via a single bit flip. | |
316 | */ | |
fe8ab488 A |
317 | #if DEVELOPMENT || DEBUG |
318 | uint32_t reg[4]; | |
319 | do_cpuid(0x80000000, reg); | |
320 | if (reg[eax] >= 0x80000001) { | |
321 | do_cpuid(0x80000001, reg); | |
322 | assert(reg[edx] & CPUID_EXTFEATURE_XD); | |
323 | } | |
324 | #endif /* DEVELOPMENT || DEBUG */ | |
316670eb | 325 | |
0a7de745 A |
326 | L3_start_index = phys_random_L3; |
327 | ||
328 | for (pml4_index = kernPhysPML4Index; | |
329 | pml4_index < (kernPhysPML4Index + kernPhysPML4EntryCount) && physAddr < highest_physaddr; | |
330 | pml4_index++) { | |
331 | if (physmap_init_L3(L3_start_index, highest_physaddr, &physAddr, &l3pte) < 0) { | |
332 | panic("Physmap page table initialization failed"); | |
333 | /* NOTREACHED */ | |
b0d623f7 | 334 | } |
0a7de745 A |
335 | |
336 | L3_start_index = 0; | |
337 | ||
338 | IdlePML4[pml4_index] = ((uintptr_t)ID_MAP_VTOP(l3pte)) | |
339 | | INTEL_PTE_VALID | |
340 | | INTEL_PTE_NX | |
341 | | INTEL_PTE_WRITE; | |
b0d623f7 A |
342 | } |
343 | ||
4ba76501 | 344 | *new_physmap_base = KVADDR(kernPhysPML4Index, phys_random_L3, 0, 0); |
0a7de745 A |
345 | /* |
346 | * physAddr contains the last-mapped physical address, so that's what we | |
347 | * add to physmap_base to derive the ending VA for the physmap. | |
348 | */ | |
4ba76501 | 349 | *new_physmap_max = *new_physmap_base + physAddr; |
316670eb | 350 | |
4ba76501 | 351 | DBG("Physical address map base: 0x%qx\n", *new_physmap_base); |
0a7de745 A |
352 | for (i = kernPhysPML4Index; i < (kernPhysPML4Index + kernPhysPML4EntryCount); i++) { |
353 | DBG("Physical map idlepml4[%d]: 0x%llx\n", i, IdlePML4[i]); | |
354 | } | |
316670eb | 355 | } |
6d2010ae | 356 | |
0a7de745 | 357 | void doublemap_init(uint8_t); |
b0d623f7 A |
358 | |
359 | static void | |
360 | Idle_PTs_init(void) | |
361 | { | |
0a7de745 | 362 | uint64_t rand64; |
4ba76501 | 363 | uint64_t new_physmap_base, new_physmap_max; |
0a7de745 | 364 | |
b0d623f7 | 365 | /* Allocate the "idle" kernel page tables: */ |
0a7de745 A |
366 | KPTphys = ALLOCPAGES(NKPT); /* level 1 */ |
367 | IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */ | |
368 | IdlePDPT = ALLOCPAGES(1); /* level 3 */ | |
369 | IdlePML4 = ALLOCPAGES(1); /* level 4 */ | |
316670eb A |
370 | |
371 | // Fill the lowest level with everything up to physfree | |
372 | fillkpt(KPTphys, | |
0a7de745 | 373 | INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT)); |
316670eb A |
374 | |
375 | /* IdlePTD */ | |
376 | fillkpt(IdlePTD, | |
0a7de745 | 377 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT); |
316670eb A |
378 | |
379 | // IdlePDPT entries | |
380 | fillkpt(IdlePDPT, | |
0a7de745 | 381 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD); |
316670eb A |
382 | |
383 | // IdlePML4 single entry for kernel space. | |
384 | fillkpt(IdlePML4 + KERNEL_PML4_INDEX, | |
0a7de745 A |
385 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1); |
386 | ||
316670eb | 387 | postcode(VSTART_PHYSMAP_INIT); |
b0d623f7 | 388 | |
0a7de745 A |
389 | /* |
390 | * early_random() cannot be called more than one time before the cpu's | |
391 | * gsbase is initialized, so use the full 64-bit value to extract the | |
392 | * two 8-bit entropy values needed for address randomization. | |
393 | */ | |
394 | rand64 = early_random(); | |
4ba76501 | 395 | physmap_init(rand64 & 0xFF, &new_physmap_base, &new_physmap_max); |
0a7de745 | 396 | doublemap_init((rand64 >> 8) & 0xFF); |
5c9f4661 | 397 | idt64_remap(); |
316670eb A |
398 | |
399 | postcode(VSTART_SET_CR3); | |
400 | ||
4ba76501 A |
401 | /* |
402 | * Switch to the page tables. We set physmap_base and physmap_max just | |
403 | * before switching to the new page tables to avoid someone calling | |
404 | * kprintf() or otherwise using physical memory in between. | |
405 | * This is needed because kprintf() writes to physical memory using | |
406 | * ml_phys_read_data and PHYSMAP_PTOV, which requires physmap_base to be | |
407 | * set correctly. | |
408 | */ | |
409 | physmap_base = new_physmap_base; | |
410 | physmap_max = new_physmap_max; | |
316670eb | 411 | set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); |
316670eb A |
412 | } |
413 | ||
cb323159 A |
414 | /* |
415 | * Release any still unused, preallocated boot kernel page tables. | |
416 | * start..end is the VA range currently unused. | |
417 | */ | |
418 | void | |
419 | Idle_PTs_release(vm_offset_t start, vm_offset_t end) | |
420 | { | |
421 | uint32_t i; | |
422 | uint32_t index_start; | |
423 | uint32_t index_limit; | |
424 | ppnum_t pn_first; | |
425 | ppnum_t pn; | |
426 | uint32_t cnt; | |
427 | ||
428 | /* | |
429 | * Align start to the next large page boundary | |
430 | */ | |
431 | start = ((start + I386_LPGMASK) & ~I386_LPGMASK); | |
432 | ||
433 | /* | |
434 | * convert start into an index in KPTphys[] | |
435 | */ | |
436 | index_start = (uint32_t)((start - KERNEL_BASE) >> PAGE_SHIFT); | |
437 | ||
438 | /* | |
439 | * Find the ending index in KPTphys[] | |
440 | */ | |
441 | index_limit = (uint32_t)((end - KERNEL_BASE) >> PAGE_SHIFT); | |
442 | ||
443 | if (index_limit > NKPT * PTE_PER_PAGE) { | |
444 | index_limit = NKPT * PTE_PER_PAGE; | |
445 | } | |
446 | ||
447 | /* | |
448 | * Make sure all the 4K page tables are empty. | |
449 | * If not, panic a development/debug kernel. | |
450 | * On a production kernel, since this would stop us from booting, | |
451 | * just abort the operation. | |
452 | */ | |
453 | for (i = index_start; i < index_limit; ++i) { | |
454 | assert(KPTphys[i] == 0); | |
455 | if (KPTphys[i] != 0) { | |
456 | return; | |
457 | } | |
458 | } | |
459 | ||
460 | /* | |
461 | * Now figure out the indices into the 2nd level page tables, IdlePTD[]. | |
462 | */ | |
463 | index_start >>= PTPGSHIFT; | |
464 | index_limit >>= PTPGSHIFT; | |
465 | if (index_limit > NPGPTD * PTE_PER_PAGE) { | |
466 | index_limit = NPGPTD * PTE_PER_PAGE; | |
467 | } | |
468 | ||
469 | if (index_limit <= index_start) { | |
470 | return; | |
471 | } | |
472 | ||
473 | ||
474 | /* | |
475 | * Now check the pages referenced from Level 2 tables. | |
476 | * They should be contiguous, assert fail if not on development/debug. | |
477 | * In production, just fail the removal to allow the system to boot. | |
478 | */ | |
479 | pn_first = 0; | |
480 | cnt = 0; | |
481 | for (i = index_start; i < index_limit; ++i) { | |
482 | assert(IdlePTD[i] != 0); | |
483 | if (IdlePTD[i] == 0) { | |
484 | return; | |
485 | } | |
486 | ||
487 | pn = (ppnum_t)((PG_FRAME & IdlePTD[i]) >> PTSHIFT); | |
488 | if (cnt == 0) { | |
489 | pn_first = pn; | |
490 | } else { | |
491 | assert(pn == pn_first + cnt); | |
492 | if (pn != pn_first + cnt) { | |
493 | return; | |
494 | } | |
495 | } | |
496 | ++cnt; | |
497 | } | |
498 | ||
499 | /* | |
500 | * Good to go, clear the level 2 entries and invalidate the TLB | |
501 | */ | |
502 | for (i = index_start; i < index_limit; ++i) { | |
503 | IdlePTD[i] = 0; | |
504 | } | |
505 | set_cr3_raw(get_cr3_raw()); | |
506 | ||
507 | /* | |
508 | * Remember these PFNs to be released later in pmap_lowmem_finalize() | |
509 | */ | |
510 | released_PT_ppn = pn_first; | |
511 | released_PT_cnt = cnt; | |
512 | #if DEVELOPMENT || DEBUG | |
513 | printf("Idle_PTs_release %d pages from PFN 0x%x\n", released_PT_cnt, released_PT_ppn); | |
514 | #endif | |
515 | } | |
516 | ||
5ba3f43e A |
517 | extern void vstart_trap_handler; |
518 | ||
0a7de745 A |
519 | #define BOOT_TRAP_VECTOR(t) \ |
520 | [t] = { \ | |
521 | (uintptr_t) &vstart_trap_handler, \ | |
522 | KERNEL64_CS, \ | |
523 | 0, \ | |
524 | ACC_P|ACC_PL_K|ACC_INTR_GATE, \ | |
525 | 0 \ | |
5ba3f43e A |
526 | }, |
527 | ||
528 | /* Recursive macro to iterate 0..31 */ | |
0a7de745 A |
529 | #define L0(x, n) x(n) |
530 | #define L1(x, n) L0(x,n-1) L0(x,n) | |
531 | #define L2(x, n) L1(x,n-2) L1(x,n) | |
532 | #define L3(x, n) L2(x,n-4) L2(x,n) | |
533 | #define L4(x, n) L3(x,n-8) L3(x,n) | |
534 | #define L5(x, n) L4(x,n-16) L4(x,n) | |
5ba3f43e A |
535 | #define FOR_0_TO_31(x) L5(x,31) |
536 | ||
537 | /* | |
538 | * Bootstrap IDT. Active only during early startup. | |
539 | * Only the trap vectors are defined since interrupts are masked. | |
540 | * All traps point to a common handler. | |
541 | */ | |
542 | struct fake_descriptor64 master_boot_idt64[IDTSZ] | |
0a7de745 A |
543 | __attribute__((section("__HIB,__desc"))) |
544 | __attribute__((aligned(PAGE_SIZE))) = { | |
5ba3f43e A |
545 | FOR_0_TO_31(BOOT_TRAP_VECTOR) |
546 | }; | |
547 | ||
548 | static void | |
549 | vstart_idt_init(void) | |
550 | { | |
0a7de745 A |
551 | x86_64_desc_register_t vstart_idt = { |
552 | sizeof(master_boot_idt64), | |
553 | master_boot_idt64 | |
554 | }; | |
555 | ||
5ba3f43e A |
556 | fix_desc64(master_boot_idt64, 32); |
557 | lidt((void *)&vstart_idt); | |
558 | } | |
b0d623f7 A |
559 | |
560 | /* | |
561 | * vstart() is called in the natural mode (64bit for K64, 32 for K32) | |
0a7de745 | 562 | * on a set of bootstrap pagetables which use large, 2MB pages to map |
b0d623f7 A |
563 | * all of physical memory in both. See idle_pt.c for details. |
564 | * | |
0a7de745 | 565 | * In K64 this identity mapping is mirrored the top and bottom 512GB |
b0d623f7 A |
566 | * slots of PML4. |
567 | * | |
568 | * The bootstrap processor called with argument boot_args_start pointing to | |
569 | * the boot-args block. The kernel's (4K page) page tables are allocated and | |
570 | * initialized before switching to these. | |
571 | * | |
572 | * Non-bootstrap processors are called with argument boot_args_start NULL. | |
573 | * These processors switch immediately to the existing kernel page tables. | |
574 | */ | |
39037602 | 575 | __attribute__((noreturn)) |
b0d623f7 A |
576 | void |
577 | vstart(vm_offset_t boot_args_start) | |
578 | { | |
0a7de745 A |
579 | boolean_t is_boot_cpu = !(boot_args_start == 0); |
580 | int cpu = 0; | |
581 | uint32_t lphysfree; | |
4ba76501 A |
582 | #if DEBUG |
583 | uint64_t gsbase; | |
584 | #endif | |
585 | ||
b0d623f7 A |
586 | |
587 | postcode(VSTART_ENTRY); | |
588 | ||
589 | if (is_boot_cpu) { | |
5ba3f43e A |
590 | /* |
591 | * Set-up temporary trap handlers during page-table set-up. | |
592 | */ | |
593 | vstart_idt_init(); | |
594 | postcode(VSTART_IDT_INIT); | |
595 | ||
0a7de745 A |
596 | /* |
597 | * Ensure that any %gs-relative access results in an immediate fault | |
598 | * until gsbase is properly initialized below | |
599 | */ | |
600 | wrmsr64(MSR_IA32_GS_BASE, EARLY_GSBASE_MAGIC); | |
601 | ||
b0d623f7 A |
602 | /* |
603 | * Get startup parameters. | |
604 | */ | |
605 | kernelBootArgs = (boot_args *)boot_args_start; | |
606 | lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize; | |
0a7de745 | 607 | physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)); |
3e170ce0 | 608 | |
6d2010ae | 609 | pal_serial_init(); |
cb323159 | 610 | |
b0d623f7 A |
611 | DBG("revision 0x%x\n", kernelBootArgs->Revision); |
612 | DBG("version 0x%x\n", kernelBootArgs->Version); | |
613 | DBG("command line %s\n", kernelBootArgs->CommandLine); | |
614 | DBG("memory map 0x%x\n", kernelBootArgs->MemoryMap); | |
615 | DBG("memory map sz 0x%x\n", kernelBootArgs->MemoryMapSize); | |
616 | DBG("kaddr 0x%x\n", kernelBootArgs->kaddr); | |
617 | DBG("ksize 0x%x\n", kernelBootArgs->ksize); | |
618 | DBG("physfree %p\n", physfree); | |
619 | DBG("bootargs: %p, &ksize: %p &kaddr: %p\n", | |
0a7de745 A |
620 | kernelBootArgs, |
621 | &kernelBootArgs->ksize, | |
622 | &kernelBootArgs->kaddr); | |
3e170ce0 A |
623 | DBG("SMBIOS mem sz 0x%llx\n", kernelBootArgs->PhysicalMemorySize); |
624 | ||
7ddcb079 A |
625 | /* |
626 | * Setup boot args given the physical start address. | |
fe8ab488 A |
627 | * Note: PE_init_platform needs to be called before Idle_PTs_init |
628 | * because access to the DeviceTree is required to read the | |
629 | * random seed before generating a random physical map slide. | |
7ddcb079 A |
630 | */ |
631 | kernelBootArgs = (boot_args *) | |
632 | ml_static_ptovirt(boot_args_start); | |
633 | DBG("i386_init(0x%lx) kernelBootArgs=%p\n", | |
634 | (unsigned long)boot_args_start, kernelBootArgs); | |
5ba3f43e A |
635 | |
636 | #if KASAN | |
637 | kasan_reserve_memory(kernelBootArgs); | |
638 | #endif | |
639 | ||
7ddcb079 A |
640 | PE_init_platform(FALSE, kernelBootArgs); |
641 | postcode(PE_INIT_PLATFORM_D); | |
fe8ab488 A |
642 | |
643 | Idle_PTs_init(); | |
644 | postcode(VSTART_IDLE_PTS_INIT); | |
645 | ||
5ba3f43e A |
646 | #if KASAN |
647 | /* Init kasan and map whatever was stolen from physfree */ | |
648 | kasan_init(); | |
649 | kasan_notify_stolen((uintptr_t)ml_static_ptovirt((vm_offset_t)physfree)); | |
650 | #endif | |
651 | ||
652 | #if MONOTONIC | |
d9a64523 | 653 | mt_early_init(); |
5ba3f43e A |
654 | #endif /* MONOTONIC */ |
655 | ||
fe8ab488 A |
656 | first_avail = (vm_offset_t)ID_MAP_VTOP(physfree); |
657 | ||
fe8ab488 | 658 | cpu_data_alloc(TRUE); |
5ba3f43e A |
659 | |
660 | cpu_desc_init(cpu_datap(0)); | |
661 | postcode(VSTART_CPU_DESC_INIT); | |
662 | cpu_desc_load(cpu_datap(0)); | |
663 | ||
664 | postcode(VSTART_CPU_MODE_INIT); | |
665 | cpu_syscall_init(cpu_datap(0)); /* cpu_syscall_init() will be | |
0a7de745 A |
666 | * invoked on the APs |
667 | * via i386_init_slave() | |
668 | */ | |
b0d623f7 | 669 | } else { |
316670eb A |
670 | /* Switch to kernel's page tables (from the Boot PTs) */ |
671 | set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); | |
b0d623f7 | 672 | /* Find our logical cpu number */ |
0a7de745 | 673 | cpu = lapic_to_cpu[(LAPIC_READ(ID) >> LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; |
4ba76501 A |
674 | #if DEBUG |
675 | gsbase = rdmsr64(MSR_IA32_GS_BASE); | |
676 | #endif | |
5ba3f43e | 677 | cpu_desc_load(cpu_datap(cpu)); |
4ba76501 | 678 | DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, gsbase); |
b0d623f7 | 679 | } |
8ad349bb | 680 | |
0a7de745 | 681 | early_boot = 0; |
b0d623f7 | 682 | postcode(VSTART_EXIT); |
316670eb | 683 | x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init |
0a7de745 A |
684 | : (uintptr_t) i386_init_slave, |
685 | cpu_datap(cpu)->cpu_int_stack_top); | |
b0d623f7 | 686 | } |
21362eb3 | 687 | |
fe8ab488 A |
688 | void |
689 | pstate_trace(void) | |
690 | { | |
691 | } | |
692 | ||
55e303ae A |
693 | /* |
694 | * Cpu initialization. Running virtual, but without MACH VM | |
b0d623f7 | 695 | * set up. |
55e303ae A |
696 | */ |
697 | void | |
7ddcb079 | 698 | i386_init(void) |
55e303ae | 699 | { |
0a7de745 A |
700 | unsigned int maxmem; |
701 | uint64_t maxmemtouse; | |
702 | unsigned int cpus = 0; | |
703 | boolean_t fidn; | |
704 | boolean_t IA32e = TRUE; | |
91447636 A |
705 | |
706 | postcode(I386_INIT_ENTRY); | |
55e303ae | 707 | |
6d2010ae | 708 | pal_i386_init(); |
fe8ab488 | 709 | tsc_init(); |
0a7de745 | 710 | rtclock_early_init(); /* mach_absolute_time() now functionsl */ |
fe8ab488 | 711 | |
39037602 | 712 | kernel_debug_string_early("i386_init"); |
fe8ab488 | 713 | pstate_trace(); |
6d2010ae | 714 | |
b0d623f7 | 715 | #if CONFIG_MCA |
0c530ab8 A |
716 | /* Initialize machine-check handling */ |
717 | mca_cpu_init(); | |
b0d623f7 | 718 | #endif |
4452a7af | 719 | |
0c530ab8 | 720 | master_cpu = 0; |
cb323159 A |
721 | |
722 | lck_mod_init(); | |
723 | ||
4ba76501 A |
724 | printf_init(); /* Init this in case we need debugger */ |
725 | ||
cb323159 A |
726 | /* |
727 | * Initialize the timer callout world | |
728 | */ | |
729 | timer_call_init(); | |
730 | ||
0c530ab8 | 731 | cpu_init(); |
b0d623f7 | 732 | |
0c530ab8 A |
733 | postcode(CPU_INIT_D); |
734 | ||
0a7de745 | 735 | panic_init(); /* Init this in case we need debugger */ |
55e303ae A |
736 | |
737 | /* setup debugging output if one has been chosen */ | |
39037602 | 738 | kernel_debug_string_early("PE_init_kprintf"); |
55e303ae | 739 | PE_init_kprintf(FALSE); |
55e303ae | 740 | |
39037602 | 741 | kernel_debug_string_early("kernel_early_bootstrap"); |
39236c6e A |
742 | kernel_early_bootstrap(); |
743 | ||
0a7de745 | 744 | if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof(dgWork.dgFlags))) { |
0c530ab8 | 745 | dgWork.dgFlags = 0; |
0a7de745 A |
746 | } |
747 | ||
0c530ab8 | 748 | serialmode = 0; |
5ba3f43e | 749 | if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) { |
0c530ab8 A |
750 | /* We want a serial keyboard and/or console */ |
751 | kprintf("Serial mode specified: %08X\n", serialmode); | |
5ba3f43e A |
752 | int force_sync = serialmode & SERIALMODE_SYNCDRAIN; |
753 | if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) { | |
754 | if (force_sync) { | |
755 | serialmode |= SERIALMODE_SYNCDRAIN; | |
756 | kprintf( | |
0a7de745 A |
757 | "WARNING: Forcing uart driver to output synchronously." |
758 | "printf()s/IOLogs will impact kernel performance.\n" | |
759 | "You are advised to avoid using 'drain_uart_sync' boot-arg.\n"); | |
5ba3f43e A |
760 | } |
761 | } | |
0c530ab8 | 762 | } |
5ba3f43e | 763 | if (serialmode & SERIALMODE_OUTPUT) { |
0c530ab8 | 764 | (void)switch_to_serial_console(); |
5ba3f43e | 765 | disableConsoleOutput = FALSE; /* Allow printfs to happen */ |
0c530ab8 A |
766 | } |
767 | ||
55e303ae | 768 | /* setup console output */ |
39037602 | 769 | kernel_debug_string_early("PE_init_printf"); |
55e303ae A |
770 | PE_init_printf(FALSE); |
771 | ||
772 | kprintf("version_variant = %s\n", version_variant); | |
773 | kprintf("version = %s\n", version); | |
0a7de745 A |
774 | |
775 | if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof(maxmem))) { | |
593a1d5f | 776 | maxmemtouse = 0; |
0a7de745 A |
777 | } else { |
778 | maxmemtouse = ((uint64_t)maxmem) * MB; | |
779 | } | |
55e303ae | 780 | |
0a7de745 A |
781 | if (PE_parse_boot_argn("cpus", &cpus, sizeof(cpus))) { |
782 | if ((0 < cpus) && (cpus < max_ncpus)) { | |
783 | max_ncpus = cpus; | |
784 | } | |
91447636 | 785 | } |
55e303ae | 786 | |
0c530ab8 A |
787 | /* |
788 | * debug support for > 4G systems | |
789 | */ | |
0a7de745 A |
790 | PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof(vm_himemory_mode)); |
791 | if (!vm_himemory_mode) { | |
792 | kprintf("himemory_mode disabled\n"); | |
793 | } | |
0c530ab8 | 794 | |
0a7de745 | 795 | if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof(fidn))) { |
2d21ac55 | 796 | force_immediate_debugger_NMI = FALSE; |
0a7de745 | 797 | } else { |
935ed37a | 798 | force_immediate_debugger_NMI = fidn; |
0a7de745 | 799 | } |
6d2010ae A |
800 | |
801 | #if DEBUG | |
802 | nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold); | |
803 | #endif | |
804 | PE_parse_boot_argn("urgency_notification_abstime", | |
805 | &urgency_notification_assert_abstime_threshold, | |
806 | sizeof(urgency_notification_assert_abstime_threshold)); | |
807 | ||
0a7de745 | 808 | if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) { |
0c530ab8 | 809 | nx_enabled = 0; |
0a7de745 | 810 | } |
0c530ab8 | 811 | |
0a7de745 | 812 | /* |
2d21ac55 | 813 | * VM initialization, after this we're using page tables... |
fe8ab488 | 814 | * Thn maximum number of cpus must be set beforehand. |
2d21ac55 | 815 | */ |
39037602 | 816 | kernel_debug_string_early("i386_vm_init"); |
0c530ab8 A |
817 | i386_vm_init(maxmemtouse, IA32e, kernelBootArgs); |
818 | ||
6d2010ae A |
819 | /* create the console for verbose or pretty mode */ |
820 | /* Note: doing this prior to tsc_init() allows for graceful panic! */ | |
821 | PE_init_platform(TRUE, kernelBootArgs); | |
822 | PE_create_console(); | |
0c530ab8 | 823 | |
39037602 | 824 | kernel_debug_string_early("power_management_init"); |
0b4c1975 | 825 | power_management_init(); |
cb323159 A |
826 | |
827 | #if MONOTONIC | |
828 | mt_cpu_up(cpu_datap(0)); | |
829 | #endif /* MONOTONIC */ | |
830 | ||
0c530ab8 A |
831 | processor_bootstrap(); |
832 | thread_bootstrap(); | |
833 | ||
fe8ab488 | 834 | pstate_trace(); |
39037602 | 835 | kernel_debug_string_early("machine_startup"); |
55e303ae | 836 | machine_startup(); |
fe8ab488 | 837 | pstate_trace(); |
55e303ae | 838 | } |
b0d623f7 | 839 | |
cb323159 | 840 | static void __dead2 |
b0d623f7 A |
841 | do_init_slave(boolean_t fast_restart) |
842 | { | |
0a7de745 | 843 | void *init_param = FULL_SLAVE_INIT; |
b0d623f7 A |
844 | |
845 | postcode(I386_INIT_SLAVE); | |
846 | ||
847 | if (!fast_restart) { | |
848 | /* Ensure that caching and write-through are enabled */ | |
0a7de745 A |
849 | set_cr0(get_cr0() & ~(CR0_NW | CR0_CD)); |
850 | ||
b0d623f7 A |
851 | DBG("i386_init_slave() CPU%d: phys (%d) active.\n", |
852 | get_cpu_number(), get_cpu_phys_number()); | |
0a7de745 | 853 | |
b0d623f7 | 854 | assert(!ml_get_interrupts_enabled()); |
0a7de745 | 855 | |
5ba3f43e | 856 | cpu_syscall_init(current_cpu_datap()); |
316670eb | 857 | pmap_cpu_init(); |
0a7de745 | 858 | |
b0d623f7 A |
859 | #if CONFIG_MCA |
860 | mca_cpu_init(); | |
861 | #endif | |
0a7de745 | 862 | |
bd504ef0 | 863 | LAPIC_INIT(); |
b0d623f7 A |
864 | lapic_configure(); |
865 | LAPIC_DUMP(); | |
866 | LAPIC_CPU_MAP_DUMP(); | |
0a7de745 | 867 | |
b0d623f7 | 868 | init_fpu(); |
0a7de745 | 869 | |
6d2010ae | 870 | #if CONFIG_MTRR |
b0d623f7 | 871 | mtrr_update_cpu(); |
6d2010ae | 872 | #endif |
bd504ef0 A |
873 | /* update CPU microcode */ |
874 | ucode_update_wake(); | |
0a7de745 A |
875 | } else { |
876 | init_param = FAST_SLAVE_INIT; | |
877 | } | |
b0d623f7 A |
878 | |
879 | #if CONFIG_VMX | |
880 | /* resume VT operation */ | |
490019cf | 881 | vmx_resume(FALSE); |
b0d623f7 A |
882 | #endif |
883 | ||
6d2010ae | 884 | #if CONFIG_MTRR |
0a7de745 A |
885 | if (!fast_restart) { |
886 | pat_init(); | |
887 | } | |
6d2010ae | 888 | #endif |
b0d623f7 | 889 | |
0a7de745 A |
890 | cpu_thread_init(); /* not strictly necessary */ |
891 | ||
892 | cpu_init(); /* Sets cpu_running which starter cpu waits for */ | |
cb323159 A |
893 | |
894 | ||
895 | #if MONOTONIC | |
896 | mt_cpu_up(current_cpu_datap()); | |
897 | #endif /* MONOTONIC */ | |
898 | ||
0a7de745 | 899 | slave_main(init_param); |
b0d623f7 | 900 | |
0a7de745 | 901 | panic("do_init_slave() returned from slave_main()"); |
b0d623f7 A |
902 | } |
903 | ||
904 | /* | |
905 | * i386_init_slave() is called from pstart. | |
906 | * We're in the cpu's interrupt stack with interrupts disabled. | |
907 | * At this point we are in legacy mode. We need to switch on IA32e | |
908 | * if the mode is set to 64-bits. | |
909 | */ | |
910 | void | |
911 | i386_init_slave(void) | |
912 | { | |
0a7de745 | 913 | do_init_slave(FALSE); |
b0d623f7 A |
914 | } |
915 | ||
916 | /* | |
917 | * i386_init_slave_fast() is called from pmCPUHalt. | |
918 | * We're running on the idle thread and need to fix up | |
919 | * some accounting and get it so that the scheduler sees this | |
920 | * CPU again. | |
921 | */ | |
922 | void | |
923 | i386_init_slave_fast(void) | |
924 | { | |
0a7de745 | 925 | do_init_slave(TRUE); |
b0d623f7 A |
926 | } |
927 | ||
5c9f4661 A |
928 | #include <libkern/kernel_mach_header.h> |
929 | ||
930 | /* TODO: Evaluate global PTEs for the double-mapped translations */ | |
931 | ||
932 | uint64_t dblmap_base, dblmap_max; | |
933 | kernel_segment_command_t *hdescseg; | |
b0d623f7 | 934 | |
5c9f4661 A |
935 | pt_entry_t *dblmapL3; |
936 | unsigned int dblallocs; | |
937 | uint64_t dblmap_dist; | |
938 | extern uint64_t idt64_hndl_table0[]; | |
939 | ||
940 | ||
0a7de745 A |
941 | void |
942 | doublemap_init(uint8_t randL3) | |
943 | { | |
5c9f4661 A |
944 | dblmapL3 = ALLOCPAGES(1); // for 512 1GiB entries |
945 | dblallocs++; | |
946 | ||
947 | struct { | |
948 | pt_entry_t entries[PTE_PER_PAGE]; | |
949 | } * dblmapL2 = ALLOCPAGES(1); // for 512 2MiB entries | |
950 | dblallocs++; | |
951 | ||
0a7de745 | 952 | dblmapL3[randL3] = ((uintptr_t)ID_MAP_VTOP(&dblmapL2[0])) |
5c9f4661 A |
953 | | INTEL_PTE_VALID |
954 | | INTEL_PTE_WRITE; | |
955 | ||
956 | hdescseg = getsegbynamefromheader(&_mh_execute_header, "__HIB"); | |
957 | ||
958 | vm_offset_t hdescb = hdescseg->vmaddr; | |
959 | unsigned long hdescsz = hdescseg->vmsize; | |
960 | unsigned long hdescszr = round_page_64(hdescsz); | |
961 | vm_offset_t hdescc = hdescb, hdesce = hdescb + hdescszr; | |
962 | ||
963 | kernel_section_t *thdescsect = getsectbynamefromheader(&_mh_execute_header, "__HIB", "__text"); | |
964 | vm_offset_t thdescb = thdescsect->addr; | |
965 | unsigned long thdescsz = thdescsect->size; | |
966 | unsigned long thdescszr = round_page_64(thdescsz); | |
967 | vm_offset_t thdesce = thdescb + thdescszr; | |
968 | ||
969 | assert((hdescb & 0xFFF) == 0); | |
970 | /* Mirror HIB translations into the double-mapped pagetable subtree*/ | |
0a7de745 | 971 | for (int i = 0; hdescc < hdesce; i++) { |
5c9f4661 A |
972 | struct { |
973 | pt_entry_t entries[PTE_PER_PAGE]; | |
974 | } * dblmapL1 = ALLOCPAGES(1); | |
975 | dblallocs++; | |
976 | dblmapL2[0].entries[i] = ((uintptr_t)ID_MAP_VTOP(&dblmapL1[0])) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF; | |
977 | int hdescn = (int) ((hdesce - hdescc) / PAGE_SIZE); | |
978 | for (int j = 0; j < MIN(PTE_PER_PAGE, hdescn); j++) { | |
979 | uint64_t template = INTEL_PTE_VALID; | |
980 | if ((hdescc >= thdescb) && (hdescc < thdesce)) { | |
981 | /* executable */ | |
982 | } else { | |
0a7de745 | 983 | template |= INTEL_PTE_WRITE | INTEL_PTE_NX; /* Writeable, NX */ |
5c9f4661 A |
984 | } |
985 | dblmapL1[0].entries[j] = ((uintptr_t)ID_MAP_VTOP(hdescc)) | template; | |
986 | hdescc += PAGE_SIZE; | |
987 | } | |
988 | } | |
989 | ||
990 | IdlePML4[KERNEL_DBLMAP_PML4_INDEX] = ((uintptr_t)ID_MAP_VTOP(dblmapL3)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF; | |
991 | ||
0a7de745 | 992 | dblmap_base = KVADDR(KERNEL_DBLMAP_PML4_INDEX, randL3, 0, 0); |
5c9f4661 A |
993 | dblmap_max = dblmap_base + hdescszr; |
994 | /* Calculate the double-map distance, which accounts for the current | |
995 | * KASLR slide | |
996 | */ | |
997 | ||
998 | dblmap_dist = dblmap_base - hdescb; | |
cb323159 A |
999 | idt64_hndl_table0[1] = DBLMAP(idt64_hndl_table0[1]); /* 64-bit exit trampoline */ |
1000 | idt64_hndl_table0[3] = DBLMAP(idt64_hndl_table0[3]); /* 32-bit exit trampoline */ | |
d9a64523 | 1001 | idt64_hndl_table0[6] = (uint64_t)(uintptr_t)&kernel_stack_mask; |
5c9f4661 A |
1002 | |
1003 | extern cpu_data_t cpshadows[], scdatas[]; | |
1004 | uintptr_t cd1 = (uintptr_t) &cpshadows[0]; | |
1005 | uintptr_t cd2 = (uintptr_t) &scdatas[0]; | |
1006 | /* Record the displacement from the kernel's per-CPU data pointer, eventually | |
1007 | * programmed into GSBASE, to the "shadows" in the doublemapped | |
1008 | * region. These are not aliases, but separate physical allocations | |
1009 | * containing data required in the doublemapped trampolines. | |
0a7de745 | 1010 | */ |
5c9f4661 A |
1011 | idt64_hndl_table0[2] = dblmap_dist + cd1 - cd2; |
1012 | ||
1013 | DBG("Double map base: 0x%qx\n", dblmap_base); | |
1014 | DBG("double map idlepml4[%d]: 0x%llx\n", KERNEL_DBLMAP_PML4_INDEX, IdlePML4[KERNEL_DBLMAP_PML4_INDEX]); | |
1015 | assert(LDTSZ > LDTSZ_MIN); | |
1016 | } | |
1017 | ||
1018 | vm_offset_t dyn_dblmap(vm_offset_t, vm_offset_t); | |
1019 | ||
1020 | #include <i386/pmap_internal.h> | |
1021 | ||
1022 | /* Use of this routine is expected to be synchronized by callers | |
1023 | * Creates non-executable aliases. | |
1024 | */ | |
0a7de745 A |
1025 | vm_offset_t |
1026 | dyn_dblmap(vm_offset_t cva, vm_offset_t sz) | |
1027 | { | |
5c9f4661 A |
1028 | vm_offset_t ava = dblmap_max; |
1029 | ||
1030 | assert((sz & PAGE_MASK) == 0); | |
1031 | assert(cva != 0); | |
1032 | ||
1033 | pmap_alias(ava, cva, cva + sz, VM_PROT_READ | VM_PROT_WRITE, PMAP_EXPAND_OPTIONS_ALIASMAP); | |
1034 | dblmap_max += sz; | |
0a7de745 | 1035 | return ava - cva; |
5c9f4661 A |
1036 | } |
1037 | /* Adjust offsets interior to the bootstrap interrupt descriptor table to redirect | |
1038 | * control to the double-mapped interrupt vectors. The IDTR proper will be | |
1039 | * programmed via cpu_desc_load() | |
1040 | */ | |
0a7de745 A |
1041 | void |
1042 | idt64_remap(void) | |
1043 | { | |
5c9f4661 A |
1044 | for (int i = 0; i < IDTSZ; i++) { |
1045 | master_idt64[i].offset64 = DBLMAP(master_idt64[i].offset64); | |
1046 | } | |
1047 | } |