]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/i386_init.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_init.c
CommitLineData
55e303ae 1/*
39037602 2 * Copyright (c) 2003-2016 Apple Inc. All rights reserved.
55e303ae 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
55e303ae 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55e303ae
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
55e303ae
A
57
58#include <mach/i386/vm_param.h>
59
60#include <string.h>
61#include <mach/vm_param.h>
62#include <mach/vm_prot.h>
63#include <mach/machine.h>
64#include <mach/time_value.h>
55e303ae
A
65#include <kern/spl.h>
66#include <kern/assert.h>
67#include <kern/debug.h>
68#include <kern/misc_protos.h>
69#include <kern/startup.h>
70#include <kern/clock.h>
0c530ab8 71#include <kern/pms.h>
55e303ae
A
72#include <kern/xpr.h>
73#include <kern/cpu_data.h>
74#include <kern/processor.h>
fe8ab488 75#include <sys/kdebug.h>
0c530ab8 76#include <console/serial_protos.h>
55e303ae
A
77#include <vm/vm_page.h>
78#include <vm/pmap.h>
79#include <vm/vm_kern.h>
6d2010ae 80#include <machine/pal_routines.h>
55e303ae
A
81#include <i386/fpu.h>
82#include <i386/pmap.h>
55e303ae 83#include <i386/misc_protos.h>
b0d623f7 84#include <i386/cpu_threads.h>
55e303ae 85#include <i386/cpuid.h>
b0d623f7 86#include <i386/lapic.h>
55e303ae 87#include <i386/mp.h>
0c530ab8 88#include <i386/mp_desc.h>
6d2010ae 89#if CONFIG_MTRR
b0d623f7 90#include <i386/mtrr.h>
6d2010ae 91#endif
91447636 92#include <i386/machine_routines.h>
b0d623f7 93#if CONFIG_MCA
0c530ab8 94#include <i386/machine_check.h>
b0d623f7 95#endif
6d2010ae 96#include <i386/ucode.h>
91447636 97#include <i386/postcode.h>
0c530ab8
A
98#include <i386/Diagnostics.h>
99#include <i386/pmCPU.h>
100#include <i386/tsc.h>
2d21ac55 101#include <i386/locks.h> /* LcksOpts */
6d2010ae
A
102#if DEBUG
103#include <machine/pal_routines.h>
104#endif
5ba3f43e
A
105
106#if MONOTONIC
107#include <kern/monotonic.h>
108#endif /* MONOTONIC */
109
110#include <san/kasan.h>
111
b0d623f7
A
112#if DEBUG
113#define DBG(x...) kprintf(x)
114#else
115#define DBG(x...)
116#endif
55e303ae 117
b0d623f7
A
118int debug_task;
119
120static boot_args *kernelBootArgs;
121
122extern int disableConsoleOutput;
123extern const char version[];
124extern const char version_variant[];
125extern int nx_enabled;
126
813fb2f6
A
127/*
128 * Set initial values so that ml_phys_* routines can use the booter's ID mapping
129 * to touch physical space before the kernel's physical aperture exists.
130 */
131uint64_t physmap_base = 0;
132uint64_t physmap_max = 4*GB;
b0d623f7 133
316670eb 134pd_entry_t *KPTphys;
b0d623f7 135pd_entry_t *IdlePTD;
316670eb
A
136pdpt_entry_t *IdlePDPT;
137pml4_entry_t *IdlePML4;
b0d623f7 138
b0d623f7 139char *physfree;
5c9f4661 140void idt64_remap(void);
b0d623f7
A
141
142/*
143 * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init()
144 * due to the mutation of physfree.
145 */
146static void *
147ALLOCPAGES(int npages)
148{
149 uintptr_t tmp = (uintptr_t)physfree;
150 bzero(physfree, npages * PAGE_SIZE);
151 physfree += npages * PAGE_SIZE;
b0d623f7 152 tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK;
b0d623f7
A
153 return (void *)tmp;
154}
155
156static void
157fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count)
158{
159 int i;
160 for (i=0; i<count; i++) {
161 base[index] = src | prot | INTEL_PTE_VALID;
162 src += PAGE_SIZE;
163 index++;
164 }
165}
166
6d2010ae 167extern pmap_paddr_t first_avail;
b0d623f7 168
b0d623f7
A
169int break_kprintf = 0;
170
171uint64_t
172x86_64_pre_sleep(void)
173{
174 IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX];
6d2010ae
A
175 uint64_t oldcr3 = get_cr3_raw();
176 set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4));
b0d623f7
A
177 return oldcr3;
178}
179
180void
181x86_64_post_sleep(uint64_t new_cr3)
182{
183 IdlePML4[0] = 0;
6d2010ae 184 set_cr3_raw((uint32_t) new_cr3);
b0d623f7
A
185}
186
b0d623f7 187
b0d623f7 188
55e303ae 189
b0d623f7
A
190// Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address
191// NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account
192// the PCI hole (which is less 4GB but not more).
7ddcb079 193
316670eb
A
194/* Compile-time guard: NPHYSMAP is capped to 256GiB, accounting for
195 * randomisation
196 */
197extern int maxphymapsupported[NPHYSMAP <= (PTE_PER_PAGE/2) ? 1 : -1];
198
b0d623f7
A
199static void
200physmap_init(void)
201{
202 pt_entry_t *physmapL3 = ALLOCPAGES(1);
203 struct {
204 pt_entry_t entries[PTE_PER_PAGE];
205 } * physmapL2 = ALLOCPAGES(NPHYSMAP);
206
316670eb 207 uint64_t i;
fe8ab488 208 uint8_t phys_random_L3 = early_random() & 0xFF;
316670eb
A
209
210 /* We assume NX support. Mark all levels of the PHYSMAP NX
211 * to avoid granting executability via a single bit flip.
212 */
fe8ab488
A
213#if DEVELOPMENT || DEBUG
214 uint32_t reg[4];
215 do_cpuid(0x80000000, reg);
216 if (reg[eax] >= 0x80000001) {
217 do_cpuid(0x80000001, reg);
218 assert(reg[edx] & CPUID_EXTFEATURE_XD);
219 }
220#endif /* DEVELOPMENT || DEBUG */
316670eb
A
221
222 for(i = 0; i < NPHYSMAP; i++) {
223 physmapL3[i + phys_random_L3] =
224 ((uintptr_t)ID_MAP_VTOP(&physmapL2[i]))
b0d623f7 225 | INTEL_PTE_VALID
316670eb 226 | INTEL_PTE_NX
b0d623f7 227 | INTEL_PTE_WRITE;
316670eb
A
228
229 uint64_t j;
230 for(j = 0; j < PTE_PER_PAGE; j++) {
231 physmapL2[i].entries[j] =
232 ((i * PTE_PER_PAGE + j) << PDSHIFT)
b0d623f7
A
233 | INTEL_PTE_PS
234 | INTEL_PTE_VALID
3e170ce0 235 | INTEL_PTE_NX
316670eb 236 | INTEL_PTE_WRITE;
b0d623f7
A
237 }
238 }
239
316670eb
A
240 IdlePML4[KERNEL_PHYSMAP_PML4_INDEX] =
241 ((uintptr_t)ID_MAP_VTOP(physmapL3))
242 | INTEL_PTE_VALID
243 | INTEL_PTE_NX
244 | INTEL_PTE_WRITE;
245
246 physmap_base = KVADDR(KERNEL_PHYSMAP_PML4_INDEX, phys_random_L3, 0, 0);
247 physmap_max = physmap_base + NPHYSMAP * GB;
248 DBG("Physical address map base: 0x%qx\n", physmap_base);
249 DBG("Physical map idlepml4[%d]: 0x%llx\n",
250 KERNEL_PHYSMAP_PML4_INDEX, IdlePML4[KERNEL_PHYSMAP_PML4_INDEX]);
251}
6d2010ae 252
5c9f4661 253void doublemap_init(void);
b0d623f7
A
254
255static void
256Idle_PTs_init(void)
257{
258 /* Allocate the "idle" kernel page tables: */
259 KPTphys = ALLOCPAGES(NKPT); /* level 1 */
260 IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */
316670eb
A
261 IdlePDPT = ALLOCPAGES(1); /* level 3 */
262 IdlePML4 = ALLOCPAGES(1); /* level 4 */
263
264 // Fill the lowest level with everything up to physfree
265 fillkpt(KPTphys,
266 INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT));
267
268 /* IdlePTD */
269 fillkpt(IdlePTD,
270 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT);
271
272 // IdlePDPT entries
273 fillkpt(IdlePDPT,
274 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD);
275
276 // IdlePML4 single entry for kernel space.
277 fillkpt(IdlePML4 + KERNEL_PML4_INDEX,
278 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1);
279
280 postcode(VSTART_PHYSMAP_INIT);
b0d623f7 281
b0d623f7 282 physmap_init();
5c9f4661
A
283 doublemap_init();
284 idt64_remap();
316670eb
A
285
286 postcode(VSTART_SET_CR3);
287
288 // Switch to the page tables..
289 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
290
291}
292
5ba3f43e
A
293extern void vstart_trap_handler;
294
295#define BOOT_TRAP_VECTOR(t) \
296 [t] = { \
297 (uintptr_t) &vstart_trap_handler, \
298 KERNEL64_CS, \
299 0, \
300 ACC_P|ACC_PL_K|ACC_INTR_GATE, \
301 0 \
302 },
303
304/* Recursive macro to iterate 0..31 */
305#define L0(x,n) x(n)
306#define L1(x,n) L0(x,n-1) L0(x,n)
307#define L2(x,n) L1(x,n-2) L1(x,n)
308#define L3(x,n) L2(x,n-4) L2(x,n)
309#define L4(x,n) L3(x,n-8) L3(x,n)
310#define L5(x,n) L4(x,n-16) L4(x,n)
311#define FOR_0_TO_31(x) L5(x,31)
312
313/*
314 * Bootstrap IDT. Active only during early startup.
315 * Only the trap vectors are defined since interrupts are masked.
316 * All traps point to a common handler.
317 */
318struct fake_descriptor64 master_boot_idt64[IDTSZ]
319 __attribute__((section("__HIB,__desc")))
320 __attribute__((aligned(PAGE_SIZE))) = {
321 FOR_0_TO_31(BOOT_TRAP_VECTOR)
322};
323
324static void
325vstart_idt_init(void)
326{
327 x86_64_desc_register_t vstart_idt = {
328 sizeof(master_boot_idt64),
329 master_boot_idt64 };
330
331 fix_desc64(master_boot_idt64, 32);
332 lidt((void *)&vstart_idt);
333}
b0d623f7
A
334
335/*
336 * vstart() is called in the natural mode (64bit for K64, 32 for K32)
337 * on a set of bootstrap pagetables which use large, 2MB pages to map
338 * all of physical memory in both. See idle_pt.c for details.
339 *
340 * In K64 this identity mapping is mirrored the top and bottom 512GB
341 * slots of PML4.
342 *
343 * The bootstrap processor called with argument boot_args_start pointing to
344 * the boot-args block. The kernel's (4K page) page tables are allocated and
345 * initialized before switching to these.
346 *
347 * Non-bootstrap processors are called with argument boot_args_start NULL.
348 * These processors switch immediately to the existing kernel page tables.
349 */
39037602 350__attribute__((noreturn))
b0d623f7
A
351void
352vstart(vm_offset_t boot_args_start)
353{
354 boolean_t is_boot_cpu = !(boot_args_start == 0);
5ba3f43e 355 int cpu = 0;
316670eb 356 uint32_t lphysfree;
b0d623f7
A
357
358 postcode(VSTART_ENTRY);
359
360 if (is_boot_cpu) {
5ba3f43e
A
361 /*
362 * Set-up temporary trap handlers during page-table set-up.
363 */
364 vstart_idt_init();
365 postcode(VSTART_IDT_INIT);
366
b0d623f7
A
367 /*
368 * Get startup parameters.
369 */
370 kernelBootArgs = (boot_args *)boot_args_start;
371 lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize;
372 physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1));
3e170ce0
A
373
374#if DEVELOPMENT || DEBUG
6d2010ae 375 pal_serial_init();
b0d623f7
A
376#endif
377 DBG("revision 0x%x\n", kernelBootArgs->Revision);
378 DBG("version 0x%x\n", kernelBootArgs->Version);
379 DBG("command line %s\n", kernelBootArgs->CommandLine);
380 DBG("memory map 0x%x\n", kernelBootArgs->MemoryMap);
381 DBG("memory map sz 0x%x\n", kernelBootArgs->MemoryMapSize);
382 DBG("kaddr 0x%x\n", kernelBootArgs->kaddr);
383 DBG("ksize 0x%x\n", kernelBootArgs->ksize);
384 DBG("physfree %p\n", physfree);
385 DBG("bootargs: %p, &ksize: %p &kaddr: %p\n",
386 kernelBootArgs,
387 &kernelBootArgs->ksize,
388 &kernelBootArgs->kaddr);
3e170ce0
A
389 DBG("SMBIOS mem sz 0x%llx\n", kernelBootArgs->PhysicalMemorySize);
390
7ddcb079
A
391 /*
392 * Setup boot args given the physical start address.
fe8ab488
A
393 * Note: PE_init_platform needs to be called before Idle_PTs_init
394 * because access to the DeviceTree is required to read the
395 * random seed before generating a random physical map slide.
7ddcb079
A
396 */
397 kernelBootArgs = (boot_args *)
398 ml_static_ptovirt(boot_args_start);
399 DBG("i386_init(0x%lx) kernelBootArgs=%p\n",
400 (unsigned long)boot_args_start, kernelBootArgs);
5ba3f43e
A
401
402#if KASAN
403 kasan_reserve_memory(kernelBootArgs);
404#endif
405
7ddcb079
A
406 PE_init_platform(FALSE, kernelBootArgs);
407 postcode(PE_INIT_PLATFORM_D);
fe8ab488
A
408
409 Idle_PTs_init();
410 postcode(VSTART_IDLE_PTS_INIT);
411
5ba3f43e
A
412#if KASAN
413 /* Init kasan and map whatever was stolen from physfree */
414 kasan_init();
415 kasan_notify_stolen((uintptr_t)ml_static_ptovirt((vm_offset_t)physfree));
416#endif
417
418#if MONOTONIC
d9a64523 419 mt_early_init();
5ba3f43e
A
420#endif /* MONOTONIC */
421
fe8ab488
A
422 first_avail = (vm_offset_t)ID_MAP_VTOP(physfree);
423
fe8ab488 424 cpu_data_alloc(TRUE);
5ba3f43e
A
425
426 cpu_desc_init(cpu_datap(0));
427 postcode(VSTART_CPU_DESC_INIT);
428 cpu_desc_load(cpu_datap(0));
429
430 postcode(VSTART_CPU_MODE_INIT);
431 cpu_syscall_init(cpu_datap(0)); /* cpu_syscall_init() will be
432 * invoked on the APs
433 * via i386_init_slave()
434 */
b0d623f7 435 } else {
316670eb
A
436 /* Switch to kernel's page tables (from the Boot PTs) */
437 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
b0d623f7
A
438 /* Find our logical cpu number */
439 cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK];
7ddcb079 440 DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE));
5ba3f43e 441 cpu_desc_load(cpu_datap(cpu));
b0d623f7 442 }
8ad349bb 443
b0d623f7 444 postcode(VSTART_EXIT);
316670eb
A
445 x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init
446 : (uintptr_t) i386_init_slave,
447 cpu_datap(cpu)->cpu_int_stack_top);
b0d623f7 448}
21362eb3 449
fe8ab488
A
450void
451pstate_trace(void)
452{
453}
454
55e303ae
A
455/*
456 * Cpu initialization. Running virtual, but without MACH VM
b0d623f7 457 * set up.
55e303ae
A
458 */
459void
7ddcb079 460i386_init(void)
55e303ae
A
461{
462 unsigned int maxmem;
0c530ab8 463 uint64_t maxmemtouse;
b0d623f7 464 unsigned int cpus = 0;
935ed37a 465 boolean_t fidn;
b0d623f7 466 boolean_t IA32e = TRUE;
91447636
A
467
468 postcode(I386_INIT_ENTRY);
55e303ae 469
6d2010ae 470 pal_i386_init();
fe8ab488
A
471 tsc_init();
472 rtclock_early_init(); /* mach_absolute_time() now functionsl */
473
39037602 474 kernel_debug_string_early("i386_init");
fe8ab488 475 pstate_trace();
6d2010ae 476
b0d623f7 477#if CONFIG_MCA
0c530ab8
A
478 /* Initialize machine-check handling */
479 mca_cpu_init();
b0d623f7 480#endif
4452a7af 481
0c530ab8 482 master_cpu = 0;
0c530ab8 483 cpu_init();
b0d623f7 484
0c530ab8
A
485 postcode(CPU_INIT_D);
486
55e303ae
A
487 printf_init(); /* Init this in case we need debugger */
488 panic_init(); /* Init this in case we need debugger */
489
490 /* setup debugging output if one has been chosen */
39037602 491 kernel_debug_string_early("PE_init_kprintf");
55e303ae 492 PE_init_kprintf(FALSE);
55e303ae 493
39037602 494 kernel_debug_string_early("kernel_early_bootstrap");
39236c6e
A
495 kernel_early_bootstrap();
496
593a1d5f 497 if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags)))
0c530ab8
A
498 dgWork.dgFlags = 0;
499
500 serialmode = 0;
5ba3f43e 501 if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) {
0c530ab8
A
502 /* We want a serial keyboard and/or console */
503 kprintf("Serial mode specified: %08X\n", serialmode);
5ba3f43e
A
504 int force_sync = serialmode & SERIALMODE_SYNCDRAIN;
505 if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) {
506 if (force_sync) {
507 serialmode |= SERIALMODE_SYNCDRAIN;
508 kprintf(
509 "WARNING: Forcing uart driver to output synchronously."
510 "printf()s/IOLogs will impact kernel performance.\n"
511 "You are advised to avoid using 'drain_uart_sync' boot-arg.\n");
512 }
513 }
0c530ab8 514 }
5ba3f43e 515 if (serialmode & SERIALMODE_OUTPUT) {
0c530ab8 516 (void)switch_to_serial_console();
5ba3f43e 517 disableConsoleOutput = FALSE; /* Allow printfs to happen */
0c530ab8
A
518 }
519
55e303ae 520 /* setup console output */
39037602 521 kernel_debug_string_early("PE_init_printf");
55e303ae
A
522 PE_init_printf(FALSE);
523
524 kprintf("version_variant = %s\n", version_variant);
525 kprintf("version = %s\n", version);
2d21ac55 526
593a1d5f
A
527 if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem)))
528 maxmemtouse = 0;
55e303ae 529 else
b0d623f7 530 maxmemtouse = ((uint64_t)maxmem) * MB;
55e303ae 531
593a1d5f 532 if (PE_parse_boot_argn("cpus", &cpus, sizeof (cpus))) {
91447636
A
533 if ((0 < cpus) && (cpus < max_ncpus))
534 max_ncpus = cpus;
535 }
55e303ae 536
0c530ab8
A
537 /*
538 * debug support for > 4G systems
539 */
fe8ab488
A
540 PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof (vm_himemory_mode));
541 if (vm_himemory_mode != 0)
542 kprintf("himemory_mode: %d\n", vm_himemory_mode);
0c530ab8 543
935ed37a 544 if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn)))
2d21ac55 545 force_immediate_debugger_NMI = FALSE;
935ed37a
A
546 else
547 force_immediate_debugger_NMI = fidn;
6d2010ae
A
548
549#if DEBUG
550 nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold);
551#endif
552 PE_parse_boot_argn("urgency_notification_abstime",
553 &urgency_notification_assert_abstime_threshold,
554 sizeof(urgency_notification_assert_abstime_threshold));
555
0c530ab8
A
556 if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD))
557 nx_enabled = 0;
558
2d21ac55
A
559 /*
560 * VM initialization, after this we're using page tables...
fe8ab488 561 * Thn maximum number of cpus must be set beforehand.
2d21ac55 562 */
39037602 563 kernel_debug_string_early("i386_vm_init");
0c530ab8
A
564 i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);
565
6d2010ae
A
566 /* create the console for verbose or pretty mode */
567 /* Note: doing this prior to tsc_init() allows for graceful panic! */
568 PE_init_platform(TRUE, kernelBootArgs);
569 PE_create_console();
0c530ab8 570
39037602 571 kernel_debug_string_early("power_management_init");
0b4c1975 572 power_management_init();
0c530ab8
A
573 processor_bootstrap();
574 thread_bootstrap();
575
fe8ab488 576 pstate_trace();
39037602 577 kernel_debug_string_early("machine_startup");
55e303ae 578 machine_startup();
fe8ab488 579 pstate_trace();
55e303ae 580}
b0d623f7
A
581
582static void
583do_init_slave(boolean_t fast_restart)
584{
585 void *init_param = FULL_SLAVE_INIT;
586
587 postcode(I386_INIT_SLAVE);
588
589 if (!fast_restart) {
590 /* Ensure that caching and write-through are enabled */
591 set_cr0(get_cr0() & ~(CR0_NW|CR0_CD));
592
593 DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
594 get_cpu_number(), get_cpu_phys_number());
595
596 assert(!ml_get_interrupts_enabled());
597
5ba3f43e 598 cpu_syscall_init(current_cpu_datap());
316670eb 599 pmap_cpu_init();
b0d623f7
A
600
601#if CONFIG_MCA
602 mca_cpu_init();
603#endif
604
bd504ef0 605 LAPIC_INIT();
b0d623f7
A
606 lapic_configure();
607 LAPIC_DUMP();
608 LAPIC_CPU_MAP_DUMP();
609
610 init_fpu();
611
6d2010ae 612#if CONFIG_MTRR
b0d623f7 613 mtrr_update_cpu();
6d2010ae 614#endif
bd504ef0
A
615 /* update CPU microcode */
616 ucode_update_wake();
b0d623f7
A
617 } else
618 init_param = FAST_SLAVE_INIT;
619
620#if CONFIG_VMX
621 /* resume VT operation */
490019cf 622 vmx_resume(FALSE);
b0d623f7
A
623#endif
624
6d2010ae 625#if CONFIG_MTRR
b0d623f7
A
626 if (!fast_restart)
627 pat_init();
6d2010ae 628#endif
b0d623f7
A
629
630 cpu_thread_init(); /* not strictly necessary */
631
39236c6e 632 cpu_init(); /* Sets cpu_running which starter cpu waits for */
b0d623f7
A
633 slave_main(init_param);
634
635 panic("do_init_slave() returned from slave_main()");
636}
637
638/*
639 * i386_init_slave() is called from pstart.
640 * We're in the cpu's interrupt stack with interrupts disabled.
641 * At this point we are in legacy mode. We need to switch on IA32e
642 * if the mode is set to 64-bits.
643 */
644void
645i386_init_slave(void)
646{
647 do_init_slave(FALSE);
648}
649
650/*
651 * i386_init_slave_fast() is called from pmCPUHalt.
652 * We're running on the idle thread and need to fix up
653 * some accounting and get it so that the scheduler sees this
654 * CPU again.
655 */
656void
657i386_init_slave_fast(void)
658{
659 do_init_slave(TRUE);
660}
661
5c9f4661
A
662#include <libkern/kernel_mach_header.h>
663
664/* TODO: Evaluate global PTEs for the double-mapped translations */
665
666uint64_t dblmap_base, dblmap_max;
667kernel_segment_command_t *hdescseg;
b0d623f7 668
5c9f4661
A
669pt_entry_t *dblmapL3;
670unsigned int dblallocs;
671uint64_t dblmap_dist;
672extern uint64_t idt64_hndl_table0[];
673
674
675void doublemap_init(void) {
676 dblmapL3 = ALLOCPAGES(1); // for 512 1GiB entries
677 dblallocs++;
678
679 struct {
680 pt_entry_t entries[PTE_PER_PAGE];
681 } * dblmapL2 = ALLOCPAGES(1); // for 512 2MiB entries
682 dblallocs++;
683
684 dblmapL3[0] = ((uintptr_t)ID_MAP_VTOP(&dblmapL2[0]))
685 | INTEL_PTE_VALID
686 | INTEL_PTE_WRITE;
687
688 hdescseg = getsegbynamefromheader(&_mh_execute_header, "__HIB");
689
690 vm_offset_t hdescb = hdescseg->vmaddr;
691 unsigned long hdescsz = hdescseg->vmsize;
692 unsigned long hdescszr = round_page_64(hdescsz);
693 vm_offset_t hdescc = hdescb, hdesce = hdescb + hdescszr;
694
695 kernel_section_t *thdescsect = getsectbynamefromheader(&_mh_execute_header, "__HIB", "__text");
696 vm_offset_t thdescb = thdescsect->addr;
697 unsigned long thdescsz = thdescsect->size;
698 unsigned long thdescszr = round_page_64(thdescsz);
699 vm_offset_t thdesce = thdescb + thdescszr;
700
701 assert((hdescb & 0xFFF) == 0);
702 /* Mirror HIB translations into the double-mapped pagetable subtree*/
703 for(int i = 0; hdescc < hdesce; i++) {
704 struct {
705 pt_entry_t entries[PTE_PER_PAGE];
706 } * dblmapL1 = ALLOCPAGES(1);
707 dblallocs++;
708 dblmapL2[0].entries[i] = ((uintptr_t)ID_MAP_VTOP(&dblmapL1[0])) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF;
709 int hdescn = (int) ((hdesce - hdescc) / PAGE_SIZE);
710 for (int j = 0; j < MIN(PTE_PER_PAGE, hdescn); j++) {
711 uint64_t template = INTEL_PTE_VALID;
712 if ((hdescc >= thdescb) && (hdescc < thdesce)) {
713 /* executable */
714 } else {
715 template |= INTEL_PTE_WRITE | INTEL_PTE_NX ; /* Writeable, NX */
716 }
717 dblmapL1[0].entries[j] = ((uintptr_t)ID_MAP_VTOP(hdescc)) | template;
718 hdescc += PAGE_SIZE;
719 }
720 }
721
722 IdlePML4[KERNEL_DBLMAP_PML4_INDEX] = ((uintptr_t)ID_MAP_VTOP(dblmapL3)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF;
723
724 dblmap_base = KVADDR(KERNEL_DBLMAP_PML4_INDEX, dblmapL3, 0, 0);
725 dblmap_max = dblmap_base + hdescszr;
726 /* Calculate the double-map distance, which accounts for the current
727 * KASLR slide
728 */
729
730 dblmap_dist = dblmap_base - hdescb;
731 idt64_hndl_table0[1] = DBLMAP(idt64_hndl_table0[1]);
d9a64523 732 idt64_hndl_table0[6] = (uint64_t)(uintptr_t)&kernel_stack_mask;
5c9f4661
A
733
734 extern cpu_data_t cpshadows[], scdatas[];
735 uintptr_t cd1 = (uintptr_t) &cpshadows[0];
736 uintptr_t cd2 = (uintptr_t) &scdatas[0];
737/* Record the displacement from the kernel's per-CPU data pointer, eventually
738 * programmed into GSBASE, to the "shadows" in the doublemapped
739 * region. These are not aliases, but separate physical allocations
740 * containing data required in the doublemapped trampolines.
741*/
742 idt64_hndl_table0[2] = dblmap_dist + cd1 - cd2;
743
744 DBG("Double map base: 0x%qx\n", dblmap_base);
745 DBG("double map idlepml4[%d]: 0x%llx\n", KERNEL_DBLMAP_PML4_INDEX, IdlePML4[KERNEL_DBLMAP_PML4_INDEX]);
746 assert(LDTSZ > LDTSZ_MIN);
747}
748
749vm_offset_t dyn_dblmap(vm_offset_t, vm_offset_t);
750
751#include <i386/pmap_internal.h>
752
753/* Use of this routine is expected to be synchronized by callers
754 * Creates non-executable aliases.
755 */
756vm_offset_t dyn_dblmap(vm_offset_t cva, vm_offset_t sz) {
757 vm_offset_t ava = dblmap_max;
758
759 assert((sz & PAGE_MASK) == 0);
760 assert(cva != 0);
761
762 pmap_alias(ava, cva, cva + sz, VM_PROT_READ | VM_PROT_WRITE, PMAP_EXPAND_OPTIONS_ALIASMAP);
763 dblmap_max += sz;
764 return (ava - cva);
765}
766/* Adjust offsets interior to the bootstrap interrupt descriptor table to redirect
767 * control to the double-mapped interrupt vectors. The IDTR proper will be
768 * programmed via cpu_desc_load()
769 */
770void idt64_remap(void) {
771 for (int i = 0; i < IDTSZ; i++) {
772 master_idt64[i].offset64 = DBLMAP(master_idt64[i].offset64);
773 }
774}