]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/i386_init.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_init.c
1 /*
2 * Copyright (c) 2003-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57
58 #include <mach/i386/vm_param.h>
59
60 #include <string.h>
61 #include <stdint.h>
62 #include <mach/vm_param.h>
63 #include <mach/vm_prot.h>
64 #include <mach/machine.h>
65 #include <mach/time_value.h>
66 #include <kern/spl.h>
67 #include <kern/assert.h>
68 #include <kern/debug.h>
69 #include <kern/misc_protos.h>
70 #include <kern/startup.h>
71 #include <kern/clock.h>
72 #include <kern/pms.h>
73 #include <kern/xpr.h>
74 #include <kern/cpu_data.h>
75 #include <kern/processor.h>
76 #include <sys/kdebug.h>
77 #include <console/serial_protos.h>
78 #include <vm/vm_page.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_kern.h>
81 #include <machine/pal_routines.h>
82 #include <i386/fpu.h>
83 #include <i386/pmap.h>
84 #include <i386/misc_protos.h>
85 #include <i386/cpu_threads.h>
86 #include <i386/cpuid.h>
87 #include <i386/lapic.h>
88 #include <i386/mp.h>
89 #include <i386/mp_desc.h>
90 #if CONFIG_MTRR
91 #include <i386/mtrr.h>
92 #endif
93 #include <i386/machine_routines.h>
94 #if CONFIG_MCA
95 #include <i386/machine_check.h>
96 #endif
97 #include <i386/ucode.h>
98 #include <i386/postcode.h>
99 #include <i386/Diagnostics.h>
100 #include <i386/pmCPU.h>
101 #include <i386/tsc.h>
102 #include <i386/locks.h> /* LcksOpts */
103 #if DEBUG
104 #include <machine/pal_routines.h>
105 #endif
106
107 #if MONOTONIC
108 #include <kern/monotonic.h>
109 #endif /* MONOTONIC */
110
111 #include <san/kasan.h>
112
113 #if DEBUG
114 #define DBG(x ...) kprintf(x)
115 #else
116 #define DBG(x ...)
117 #endif
118
119 int debug_task;
120
121 int early_boot = 1;
122
123 static boot_args *kernelBootArgs;
124
125 extern int disableConsoleOutput;
126 extern const char version[];
127 extern const char version_variant[];
128 extern int nx_enabled;
129
130 /*
131 * Set initial values so that ml_phys_* routines can use the booter's ID mapping
132 * to touch physical space before the kernel's physical aperture exists.
133 */
134 uint64_t physmap_base = 0;
135 uint64_t physmap_max = 4 * GB;
136
137 pd_entry_t *KPTphys;
138 pd_entry_t *IdlePTD;
139 pdpt_entry_t *IdlePDPT;
140 pml4_entry_t *IdlePML4;
141
142 int kernPhysPML4Index;
143 int kernPhysPML4EntryCount;
144
145 int allow_64bit_proc_LDT_ops;
146
147 char *physfree;
148 void idt64_remap(void);
149
150 /*
151 * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init()
152 * due to the mutation of physfree.
153 */
154 static void *
155 ALLOCPAGES(int npages)
156 {
157 uintptr_t tmp = (uintptr_t)physfree;
158 bzero(physfree, npages * PAGE_SIZE);
159 physfree += npages * PAGE_SIZE;
160 tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK;
161 return (void *)tmp;
162 }
163
164 static void
165 fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count)
166 {
167 int i;
168 for (i = 0; i < count; i++) {
169 base[index] = src | prot | INTEL_PTE_VALID;
170 src += PAGE_SIZE;
171 index++;
172 }
173 }
174
175 extern pmap_paddr_t first_avail;
176
177 int break_kprintf = 0;
178
179 uint64_t
180 x86_64_pre_sleep(void)
181 {
182 IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX];
183 uint64_t oldcr3 = get_cr3_raw();
184 set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4));
185 return oldcr3;
186 }
187
188 void
189 x86_64_post_sleep(uint64_t new_cr3)
190 {
191 IdlePML4[0] = 0;
192 set_cr3_raw((uint32_t) new_cr3);
193 }
194
195
196
197
198 // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address
199 // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account
200 // the PCI hole (which is less 4GB but not more).
201
202 static int
203 physmap_init_L2(uint64_t *physStart, pt_entry_t **l2ptep)
204 {
205 unsigned i;
206 pt_entry_t *physmapL2 = ALLOCPAGES(1);
207
208 if (physmapL2 == NULL) {
209 DBG("physmap_init_L2 page alloc failed when initting L2 for physAddr 0x%llx.\n", *physStart);
210 *l2ptep = NULL;
211 return -1;
212 }
213
214 for (i = 0; i < NPDPG; i++) {
215 physmapL2[i] = *physStart
216 | INTEL_PTE_PS
217 | INTEL_PTE_VALID
218 | INTEL_PTE_NX
219 | INTEL_PTE_WRITE;
220
221 *physStart += NBPD;
222 }
223 *l2ptep = physmapL2;
224 return 0;
225 }
226
227 static int
228 physmap_init_L3(int startIndex, uint64_t highest_phys, uint64_t *physStart, pt_entry_t **l3ptep)
229 {
230 unsigned i;
231 int ret;
232 pt_entry_t *l2pte;
233 pt_entry_t *physmapL3 = ALLOCPAGES(1); /* ALLOCPAGES bzeroes the memory */
234
235 if (physmapL3 == NULL) {
236 DBG("physmap_init_L3 page alloc failed when initting L3 for physAddr 0x%llx.\n", *physStart);
237 *l3ptep = NULL;
238 return -1;
239 }
240
241 for (i = startIndex; i < NPDPTPG && *physStart < highest_phys; i++) {
242 if ((ret = physmap_init_L2(physStart, &l2pte)) < 0) {
243 return ret;
244 }
245
246 physmapL3[i] = ((uintptr_t)ID_MAP_VTOP(l2pte))
247 | INTEL_PTE_VALID
248 | INTEL_PTE_NX
249 | INTEL_PTE_WRITE;
250 }
251
252 *l3ptep = physmapL3;
253
254 return 0;
255 }
256
257 static void
258 physmap_init(uint8_t phys_random_L3)
259 {
260 pt_entry_t *l3pte;
261 int pml4_index, i;
262 int L3_start_index;
263 uint64_t physAddr = 0;
264 uint64_t highest_physaddr;
265 unsigned pdpte_count;
266
267 #if DEVELOPMENT || DEBUG
268 if (kernelBootArgs->PhysicalMemorySize > K64_MAXMEM) {
269 panic("Installed physical memory exceeds configured maximum.");
270 }
271 #endif
272
273 /*
274 * Add 4GB to the loader-provided physical memory size to account for MMIO space
275 * XXX in a perfect world, we'd scan PCI buses and count the max memory requested in BARs by
276 * XXX all enumerated device, then add more for hot-pluggable devices.
277 */
278 highest_physaddr = kernelBootArgs->PhysicalMemorySize + 4 * GB;
279
280 /*
281 * Calculate the number of PML4 entries we'll need. The total number of entries is
282 * pdpte_count = (((highest_physaddr) >> PDPT_SHIFT) + entropy_value +
283 * ((highest_physaddr & PDPT_MASK) == 0 ? 0 : 1))
284 * pml4e_count = pdpte_count >> (PML4_SHIFT - PDPT_SHIFT)
285 */
286 assert(highest_physaddr < (UINT64_MAX - PDPTMASK));
287 pdpte_count = (unsigned) (((highest_physaddr + PDPTMASK) >> PDPTSHIFT) + phys_random_L3);
288 kernPhysPML4EntryCount = (pdpte_count + ((1U << (PML4SHIFT - PDPTSHIFT)) - 1)) >> (PML4SHIFT - PDPTSHIFT);
289 if (kernPhysPML4EntryCount == 0) {
290 kernPhysPML4EntryCount = 1;
291 }
292 if (kernPhysPML4EntryCount > KERNEL_PHYSMAP_PML4_COUNT_MAX) {
293 #if DEVELOPMENT || DEBUG
294 panic("physmap too large");
295 #else
296 kprintf("[pmap] Limiting physmap to %d PML4s (was %d)\n", KERNEL_PHYSMAP_PML4_COUNT_MAX,
297 kernPhysPML4EntryCount);
298 kernPhysPML4EntryCount = KERNEL_PHYSMAP_PML4_COUNT_MAX;
299 #endif
300 }
301
302 kernPhysPML4Index = KERNEL_KEXTS_INDEX - kernPhysPML4EntryCount; /* utb: KERNEL_PHYSMAP_PML4_INDEX */
303
304 /*
305 * XXX: Make sure that the addresses returned for physmapL3 and physmapL2 plus their extents
306 * are in the system-available memory range
307 */
308
309
310 /* We assume NX support. Mark all levels of the PHYSMAP NX
311 * to avoid granting executability via a single bit flip.
312 */
313 #if DEVELOPMENT || DEBUG
314 uint32_t reg[4];
315 do_cpuid(0x80000000, reg);
316 if (reg[eax] >= 0x80000001) {
317 do_cpuid(0x80000001, reg);
318 assert(reg[edx] & CPUID_EXTFEATURE_XD);
319 }
320 #endif /* DEVELOPMENT || DEBUG */
321
322 L3_start_index = phys_random_L3;
323
324 for (pml4_index = kernPhysPML4Index;
325 pml4_index < (kernPhysPML4Index + kernPhysPML4EntryCount) && physAddr < highest_physaddr;
326 pml4_index++) {
327 if (physmap_init_L3(L3_start_index, highest_physaddr, &physAddr, &l3pte) < 0) {
328 panic("Physmap page table initialization failed");
329 /* NOTREACHED */
330 }
331
332 L3_start_index = 0;
333
334 IdlePML4[pml4_index] = ((uintptr_t)ID_MAP_VTOP(l3pte))
335 | INTEL_PTE_VALID
336 | INTEL_PTE_NX
337 | INTEL_PTE_WRITE;
338 }
339
340 physmap_base = KVADDR(kernPhysPML4Index, phys_random_L3, 0, 0);
341 /*
342 * physAddr contains the last-mapped physical address, so that's what we
343 * add to physmap_base to derive the ending VA for the physmap.
344 */
345 physmap_max = physmap_base + physAddr;
346
347 DBG("Physical address map base: 0x%qx\n", physmap_base);
348 for (i = kernPhysPML4Index; i < (kernPhysPML4Index + kernPhysPML4EntryCount); i++) {
349 DBG("Physical map idlepml4[%d]: 0x%llx\n", i, IdlePML4[i]);
350 }
351 }
352
353 void doublemap_init(uint8_t);
354
355 static void
356 Idle_PTs_init(void)
357 {
358 uint64_t rand64;
359
360 /* Allocate the "idle" kernel page tables: */
361 KPTphys = ALLOCPAGES(NKPT); /* level 1 */
362 IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */
363 IdlePDPT = ALLOCPAGES(1); /* level 3 */
364 IdlePML4 = ALLOCPAGES(1); /* level 4 */
365
366 // Fill the lowest level with everything up to physfree
367 fillkpt(KPTphys,
368 INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT));
369
370 /* IdlePTD */
371 fillkpt(IdlePTD,
372 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT);
373
374 // IdlePDPT entries
375 fillkpt(IdlePDPT,
376 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD);
377
378 // IdlePML4 single entry for kernel space.
379 fillkpt(IdlePML4 + KERNEL_PML4_INDEX,
380 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1);
381
382 postcode(VSTART_PHYSMAP_INIT);
383
384 /*
385 * early_random() cannot be called more than one time before the cpu's
386 * gsbase is initialized, so use the full 64-bit value to extract the
387 * two 8-bit entropy values needed for address randomization.
388 */
389 rand64 = early_random();
390 physmap_init(rand64 & 0xFF);
391 doublemap_init((rand64 >> 8) & 0xFF);
392 idt64_remap();
393
394 postcode(VSTART_SET_CR3);
395
396 // Switch to the page tables..
397 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
398 }
399
400 extern void vstart_trap_handler;
401
402 #define BOOT_TRAP_VECTOR(t) \
403 [t] = { \
404 (uintptr_t) &vstart_trap_handler, \
405 KERNEL64_CS, \
406 0, \
407 ACC_P|ACC_PL_K|ACC_INTR_GATE, \
408 0 \
409 },
410
411 /* Recursive macro to iterate 0..31 */
412 #define L0(x, n) x(n)
413 #define L1(x, n) L0(x,n-1) L0(x,n)
414 #define L2(x, n) L1(x,n-2) L1(x,n)
415 #define L3(x, n) L2(x,n-4) L2(x,n)
416 #define L4(x, n) L3(x,n-8) L3(x,n)
417 #define L5(x, n) L4(x,n-16) L4(x,n)
418 #define FOR_0_TO_31(x) L5(x,31)
419
420 /*
421 * Bootstrap IDT. Active only during early startup.
422 * Only the trap vectors are defined since interrupts are masked.
423 * All traps point to a common handler.
424 */
425 struct fake_descriptor64 master_boot_idt64[IDTSZ]
426 __attribute__((section("__HIB,__desc")))
427 __attribute__((aligned(PAGE_SIZE))) = {
428 FOR_0_TO_31(BOOT_TRAP_VECTOR)
429 };
430
431 static void
432 vstart_idt_init(void)
433 {
434 x86_64_desc_register_t vstart_idt = {
435 sizeof(master_boot_idt64),
436 master_boot_idt64
437 };
438
439 fix_desc64(master_boot_idt64, 32);
440 lidt((void *)&vstart_idt);
441 }
442
443 /*
444 * vstart() is called in the natural mode (64bit for K64, 32 for K32)
445 * on a set of bootstrap pagetables which use large, 2MB pages to map
446 * all of physical memory in both. See idle_pt.c for details.
447 *
448 * In K64 this identity mapping is mirrored the top and bottom 512GB
449 * slots of PML4.
450 *
451 * The bootstrap processor called with argument boot_args_start pointing to
452 * the boot-args block. The kernel's (4K page) page tables are allocated and
453 * initialized before switching to these.
454 *
455 * Non-bootstrap processors are called with argument boot_args_start NULL.
456 * These processors switch immediately to the existing kernel page tables.
457 */
458 __attribute__((noreturn))
459 void
460 vstart(vm_offset_t boot_args_start)
461 {
462 boolean_t is_boot_cpu = !(boot_args_start == 0);
463 int cpu = 0;
464 uint32_t lphysfree;
465
466 postcode(VSTART_ENTRY);
467
468 if (is_boot_cpu) {
469 /*
470 * Set-up temporary trap handlers during page-table set-up.
471 */
472 vstart_idt_init();
473 postcode(VSTART_IDT_INIT);
474
475 /*
476 * Ensure that any %gs-relative access results in an immediate fault
477 * until gsbase is properly initialized below
478 */
479 wrmsr64(MSR_IA32_GS_BASE, EARLY_GSBASE_MAGIC);
480
481 /*
482 * Get startup parameters.
483 */
484 kernelBootArgs = (boot_args *)boot_args_start;
485 lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize;
486 physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
487
488 #if DEVELOPMENT || DEBUG
489 pal_serial_init();
490 #endif
491 DBG("revision 0x%x\n", kernelBootArgs->Revision);
492 DBG("version 0x%x\n", kernelBootArgs->Version);
493 DBG("command line %s\n", kernelBootArgs->CommandLine);
494 DBG("memory map 0x%x\n", kernelBootArgs->MemoryMap);
495 DBG("memory map sz 0x%x\n", kernelBootArgs->MemoryMapSize);
496 DBG("kaddr 0x%x\n", kernelBootArgs->kaddr);
497 DBG("ksize 0x%x\n", kernelBootArgs->ksize);
498 DBG("physfree %p\n", physfree);
499 DBG("bootargs: %p, &ksize: %p &kaddr: %p\n",
500 kernelBootArgs,
501 &kernelBootArgs->ksize,
502 &kernelBootArgs->kaddr);
503 DBG("SMBIOS mem sz 0x%llx\n", kernelBootArgs->PhysicalMemorySize);
504
505 /*
506 * Setup boot args given the physical start address.
507 * Note: PE_init_platform needs to be called before Idle_PTs_init
508 * because access to the DeviceTree is required to read the
509 * random seed before generating a random physical map slide.
510 */
511 kernelBootArgs = (boot_args *)
512 ml_static_ptovirt(boot_args_start);
513 DBG("i386_init(0x%lx) kernelBootArgs=%p\n",
514 (unsigned long)boot_args_start, kernelBootArgs);
515
516 #if KASAN
517 kasan_reserve_memory(kernelBootArgs);
518 #endif
519
520 PE_init_platform(FALSE, kernelBootArgs);
521 postcode(PE_INIT_PLATFORM_D);
522
523 Idle_PTs_init();
524 postcode(VSTART_IDLE_PTS_INIT);
525
526 #if KASAN
527 /* Init kasan and map whatever was stolen from physfree */
528 kasan_init();
529 kasan_notify_stolen((uintptr_t)ml_static_ptovirt((vm_offset_t)physfree));
530 #endif
531
532 #if MONOTONIC
533 mt_early_init();
534 #endif /* MONOTONIC */
535
536 first_avail = (vm_offset_t)ID_MAP_VTOP(physfree);
537
538 cpu_data_alloc(TRUE);
539
540 cpu_desc_init(cpu_datap(0));
541 postcode(VSTART_CPU_DESC_INIT);
542 cpu_desc_load(cpu_datap(0));
543
544 postcode(VSTART_CPU_MODE_INIT);
545 cpu_syscall_init(cpu_datap(0)); /* cpu_syscall_init() will be
546 * invoked on the APs
547 * via i386_init_slave()
548 */
549 } else {
550 /* Switch to kernel's page tables (from the Boot PTs) */
551 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
552 /* Find our logical cpu number */
553 cpu = lapic_to_cpu[(LAPIC_READ(ID) >> LAPIC_ID_SHIFT) & LAPIC_ID_MASK];
554 DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE));
555 cpu_desc_load(cpu_datap(cpu));
556 }
557
558 early_boot = 0;
559 postcode(VSTART_EXIT);
560 x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init
561 : (uintptr_t) i386_init_slave,
562 cpu_datap(cpu)->cpu_int_stack_top);
563 }
564
565 void
566 pstate_trace(void)
567 {
568 }
569
570 /*
571 * Cpu initialization. Running virtual, but without MACH VM
572 * set up.
573 */
574 void
575 i386_init(void)
576 {
577 unsigned int maxmem;
578 uint64_t maxmemtouse;
579 unsigned int cpus = 0;
580 boolean_t fidn;
581 boolean_t IA32e = TRUE;
582
583 postcode(I386_INIT_ENTRY);
584
585 pal_i386_init();
586 tsc_init();
587 rtclock_early_init(); /* mach_absolute_time() now functionsl */
588
589 kernel_debug_string_early("i386_init");
590 pstate_trace();
591
592 #if CONFIG_MCA
593 /* Initialize machine-check handling */
594 mca_cpu_init();
595 #endif
596
597 master_cpu = 0;
598 cpu_init();
599
600 postcode(CPU_INIT_D);
601
602 printf_init(); /* Init this in case we need debugger */
603 panic_init(); /* Init this in case we need debugger */
604
605 /* setup debugging output if one has been chosen */
606 kernel_debug_string_early("PE_init_kprintf");
607 PE_init_kprintf(FALSE);
608
609 kernel_debug_string_early("kernel_early_bootstrap");
610 kernel_early_bootstrap();
611
612 if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof(dgWork.dgFlags))) {
613 dgWork.dgFlags = 0;
614 }
615
616 if (!PE_parse_boot_argn("ldt64", &allow_64bit_proc_LDT_ops,
617 sizeof(allow_64bit_proc_LDT_ops))) {
618 allow_64bit_proc_LDT_ops = 0;
619 }
620
621 serialmode = 0;
622 if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) {
623 /* We want a serial keyboard and/or console */
624 kprintf("Serial mode specified: %08X\n", serialmode);
625 int force_sync = serialmode & SERIALMODE_SYNCDRAIN;
626 if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) {
627 if (force_sync) {
628 serialmode |= SERIALMODE_SYNCDRAIN;
629 kprintf(
630 "WARNING: Forcing uart driver to output synchronously."
631 "printf()s/IOLogs will impact kernel performance.\n"
632 "You are advised to avoid using 'drain_uart_sync' boot-arg.\n");
633 }
634 }
635 }
636 if (serialmode & SERIALMODE_OUTPUT) {
637 (void)switch_to_serial_console();
638 disableConsoleOutput = FALSE; /* Allow printfs to happen */
639 }
640
641 /* setup console output */
642 kernel_debug_string_early("PE_init_printf");
643 PE_init_printf(FALSE);
644
645 kprintf("version_variant = %s\n", version_variant);
646 kprintf("version = %s\n", version);
647
648 if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof(maxmem))) {
649 maxmemtouse = 0;
650 } else {
651 maxmemtouse = ((uint64_t)maxmem) * MB;
652 }
653
654 if (PE_parse_boot_argn("cpus", &cpus, sizeof(cpus))) {
655 if ((0 < cpus) && (cpus < max_ncpus)) {
656 max_ncpus = cpus;
657 }
658 }
659
660 /*
661 * debug support for > 4G systems
662 */
663 PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof(vm_himemory_mode));
664 if (!vm_himemory_mode) {
665 kprintf("himemory_mode disabled\n");
666 }
667
668 if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof(fidn))) {
669 force_immediate_debugger_NMI = FALSE;
670 } else {
671 force_immediate_debugger_NMI = fidn;
672 }
673
674 #if DEBUG
675 nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold);
676 #endif
677 PE_parse_boot_argn("urgency_notification_abstime",
678 &urgency_notification_assert_abstime_threshold,
679 sizeof(urgency_notification_assert_abstime_threshold));
680
681 if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) {
682 nx_enabled = 0;
683 }
684
685 /*
686 * VM initialization, after this we're using page tables...
687 * Thn maximum number of cpus must be set beforehand.
688 */
689 kernel_debug_string_early("i386_vm_init");
690 i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);
691
692 /* create the console for verbose or pretty mode */
693 /* Note: doing this prior to tsc_init() allows for graceful panic! */
694 PE_init_platform(TRUE, kernelBootArgs);
695 PE_create_console();
696
697 kernel_debug_string_early("power_management_init");
698 power_management_init();
699 processor_bootstrap();
700 thread_bootstrap();
701
702 pstate_trace();
703 kernel_debug_string_early("machine_startup");
704 machine_startup();
705 pstate_trace();
706 }
707
708 static void
709 do_init_slave(boolean_t fast_restart)
710 {
711 void *init_param = FULL_SLAVE_INIT;
712
713 postcode(I386_INIT_SLAVE);
714
715 if (!fast_restart) {
716 /* Ensure that caching and write-through are enabled */
717 set_cr0(get_cr0() & ~(CR0_NW | CR0_CD));
718
719 DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
720 get_cpu_number(), get_cpu_phys_number());
721
722 assert(!ml_get_interrupts_enabled());
723
724 cpu_syscall_init(current_cpu_datap());
725 pmap_cpu_init();
726
727 #if CONFIG_MCA
728 mca_cpu_init();
729 #endif
730
731 LAPIC_INIT();
732 lapic_configure();
733 LAPIC_DUMP();
734 LAPIC_CPU_MAP_DUMP();
735
736 init_fpu();
737
738 #if CONFIG_MTRR
739 mtrr_update_cpu();
740 #endif
741 /* update CPU microcode */
742 ucode_update_wake();
743
744 /* Do CPU workarounds after the microcode update */
745 cpuid_do_was();
746 } else {
747 init_param = FAST_SLAVE_INIT;
748 }
749
750 #if CONFIG_VMX
751 /* resume VT operation */
752 vmx_resume(FALSE);
753 #endif
754
755 #if CONFIG_MTRR
756 if (!fast_restart) {
757 pat_init();
758 }
759 #endif
760
761 cpu_thread_init(); /* not strictly necessary */
762
763 cpu_init(); /* Sets cpu_running which starter cpu waits for */
764 slave_main(init_param);
765
766 panic("do_init_slave() returned from slave_main()");
767 }
768
769 /*
770 * i386_init_slave() is called from pstart.
771 * We're in the cpu's interrupt stack with interrupts disabled.
772 * At this point we are in legacy mode. We need to switch on IA32e
773 * if the mode is set to 64-bits.
774 */
775 void
776 i386_init_slave(void)
777 {
778 do_init_slave(FALSE);
779 }
780
781 /*
782 * i386_init_slave_fast() is called from pmCPUHalt.
783 * We're running on the idle thread and need to fix up
784 * some accounting and get it so that the scheduler sees this
785 * CPU again.
786 */
787 void
788 i386_init_slave_fast(void)
789 {
790 do_init_slave(TRUE);
791 }
792
793 #include <libkern/kernel_mach_header.h>
794
795 /* TODO: Evaluate global PTEs for the double-mapped translations */
796
797 uint64_t dblmap_base, dblmap_max;
798 kernel_segment_command_t *hdescseg;
799
800 pt_entry_t *dblmapL3;
801 unsigned int dblallocs;
802 uint64_t dblmap_dist;
803 extern uint64_t idt64_hndl_table0[];
804
805
806 void
807 doublemap_init(uint8_t randL3)
808 {
809 dblmapL3 = ALLOCPAGES(1); // for 512 1GiB entries
810 dblallocs++;
811
812 struct {
813 pt_entry_t entries[PTE_PER_PAGE];
814 } * dblmapL2 = ALLOCPAGES(1); // for 512 2MiB entries
815 dblallocs++;
816
817 dblmapL3[randL3] = ((uintptr_t)ID_MAP_VTOP(&dblmapL2[0]))
818 | INTEL_PTE_VALID
819 | INTEL_PTE_WRITE;
820
821 hdescseg = getsegbynamefromheader(&_mh_execute_header, "__HIB");
822
823 vm_offset_t hdescb = hdescseg->vmaddr;
824 unsigned long hdescsz = hdescseg->vmsize;
825 unsigned long hdescszr = round_page_64(hdescsz);
826 vm_offset_t hdescc = hdescb, hdesce = hdescb + hdescszr;
827
828 kernel_section_t *thdescsect = getsectbynamefromheader(&_mh_execute_header, "__HIB", "__text");
829 vm_offset_t thdescb = thdescsect->addr;
830 unsigned long thdescsz = thdescsect->size;
831 unsigned long thdescszr = round_page_64(thdescsz);
832 vm_offset_t thdesce = thdescb + thdescszr;
833
834 assert((hdescb & 0xFFF) == 0);
835 /* Mirror HIB translations into the double-mapped pagetable subtree*/
836 for (int i = 0; hdescc < hdesce; i++) {
837 struct {
838 pt_entry_t entries[PTE_PER_PAGE];
839 } * dblmapL1 = ALLOCPAGES(1);
840 dblallocs++;
841 dblmapL2[0].entries[i] = ((uintptr_t)ID_MAP_VTOP(&dblmapL1[0])) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF;
842 int hdescn = (int) ((hdesce - hdescc) / PAGE_SIZE);
843 for (int j = 0; j < MIN(PTE_PER_PAGE, hdescn); j++) {
844 uint64_t template = INTEL_PTE_VALID;
845 if ((hdescc >= thdescb) && (hdescc < thdesce)) {
846 /* executable */
847 } else {
848 template |= INTEL_PTE_WRITE | INTEL_PTE_NX; /* Writeable, NX */
849 }
850 dblmapL1[0].entries[j] = ((uintptr_t)ID_MAP_VTOP(hdescc)) | template;
851 hdescc += PAGE_SIZE;
852 }
853 }
854
855 IdlePML4[KERNEL_DBLMAP_PML4_INDEX] = ((uintptr_t)ID_MAP_VTOP(dblmapL3)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF;
856
857 dblmap_base = KVADDR(KERNEL_DBLMAP_PML4_INDEX, randL3, 0, 0);
858 dblmap_max = dblmap_base + hdescszr;
859 /* Calculate the double-map distance, which accounts for the current
860 * KASLR slide
861 */
862
863 dblmap_dist = dblmap_base - hdescb;
864 idt64_hndl_table0[1] = DBLMAP(idt64_hndl_table0[1]);
865 idt64_hndl_table0[6] = (uint64_t)(uintptr_t)&kernel_stack_mask;
866
867 extern cpu_data_t cpshadows[], scdatas[];
868 uintptr_t cd1 = (uintptr_t) &cpshadows[0];
869 uintptr_t cd2 = (uintptr_t) &scdatas[0];
870 /* Record the displacement from the kernel's per-CPU data pointer, eventually
871 * programmed into GSBASE, to the "shadows" in the doublemapped
872 * region. These are not aliases, but separate physical allocations
873 * containing data required in the doublemapped trampolines.
874 */
875 idt64_hndl_table0[2] = dblmap_dist + cd1 - cd2;
876
877 DBG("Double map base: 0x%qx\n", dblmap_base);
878 DBG("double map idlepml4[%d]: 0x%llx\n", KERNEL_DBLMAP_PML4_INDEX, IdlePML4[KERNEL_DBLMAP_PML4_INDEX]);
879 assert(LDTSZ > LDTSZ_MIN);
880 }
881
882 vm_offset_t dyn_dblmap(vm_offset_t, vm_offset_t);
883
884 #include <i386/pmap_internal.h>
885
886 /* Use of this routine is expected to be synchronized by callers
887 * Creates non-executable aliases.
888 */
889 vm_offset_t
890 dyn_dblmap(vm_offset_t cva, vm_offset_t sz)
891 {
892 vm_offset_t ava = dblmap_max;
893
894 assert((sz & PAGE_MASK) == 0);
895 assert(cva != 0);
896
897 pmap_alias(ava, cva, cva + sz, VM_PROT_READ | VM_PROT_WRITE, PMAP_EXPAND_OPTIONS_ALIASMAP);
898 dblmap_max += sz;
899 return ava - cva;
900 }
901 /* Adjust offsets interior to the bootstrap interrupt descriptor table to redirect
902 * control to the double-mapped interrupt vectors. The IDTR proper will be
903 * programmed via cpu_desc_load()
904 */
905 void
906 idt64_remap(void)
907 {
908 for (int i = 0; i < IDTSZ; i++) {
909 master_idt64[i].offset64 = DBLMAP(master_idt64[i].offset64);
910 }
911 }