]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/i386_init.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_init.c
1 /*
2 * Copyright (c) 2003-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57
58 #include <mach/i386/vm_param.h>
59
60 #include <string.h>
61 #include <mach/vm_param.h>
62 #include <mach/vm_prot.h>
63 #include <mach/machine.h>
64 #include <mach/time_value.h>
65 #include <kern/spl.h>
66 #include <kern/assert.h>
67 #include <kern/debug.h>
68 #include <kern/misc_protos.h>
69 #include <kern/startup.h>
70 #include <kern/clock.h>
71 #include <kern/pms.h>
72 #include <kern/xpr.h>
73 #include <kern/cpu_data.h>
74 #include <kern/processor.h>
75 #include <sys/kdebug.h>
76 #include <console/serial_protos.h>
77 #include <vm/vm_page.h>
78 #include <vm/pmap.h>
79 #include <vm/vm_kern.h>
80 #include <machine/pal_routines.h>
81 #include <i386/fpu.h>
82 #include <i386/pmap.h>
83 #include <i386/misc_protos.h>
84 #include <i386/cpu_threads.h>
85 #include <i386/cpuid.h>
86 #include <i386/lapic.h>
87 #include <i386/mp.h>
88 #include <i386/mp_desc.h>
89 #if CONFIG_MTRR
90 #include <i386/mtrr.h>
91 #endif
92 #include <i386/machine_routines.h>
93 #if CONFIG_MCA
94 #include <i386/machine_check.h>
95 #endif
96 #include <i386/ucode.h>
97 #include <i386/postcode.h>
98 #include <i386/Diagnostics.h>
99 #include <i386/pmCPU.h>
100 #include <i386/tsc.h>
101 #include <i386/locks.h> /* LcksOpts */
102 #if DEBUG
103 #include <machine/pal_routines.h>
104 #endif
105
106 #if MONOTONIC
107 #include <kern/monotonic.h>
108 #endif /* MONOTONIC */
109
110 #include <san/kasan.h>
111
112 #if DEBUG
113 #define DBG(x...) kprintf(x)
114 #else
115 #define DBG(x...)
116 #endif
117
118 int debug_task;
119
120 static boot_args *kernelBootArgs;
121
122 extern int disableConsoleOutput;
123 extern const char version[];
124 extern const char version_variant[];
125 extern int nx_enabled;
126
127 /*
128 * Set initial values so that ml_phys_* routines can use the booter's ID mapping
129 * to touch physical space before the kernel's physical aperture exists.
130 */
131 uint64_t physmap_base = 0;
132 uint64_t physmap_max = 4*GB;
133
134 pd_entry_t *KPTphys;
135 pd_entry_t *IdlePTD;
136 pdpt_entry_t *IdlePDPT;
137 pml4_entry_t *IdlePML4;
138
139 char *physfree;
140 void idt64_remap(void);
141
142 /*
143 * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init()
144 * due to the mutation of physfree.
145 */
146 static void *
147 ALLOCPAGES(int npages)
148 {
149 uintptr_t tmp = (uintptr_t)physfree;
150 bzero(physfree, npages * PAGE_SIZE);
151 physfree += npages * PAGE_SIZE;
152 tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK;
153 return (void *)tmp;
154 }
155
156 static void
157 fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count)
158 {
159 int i;
160 for (i=0; i<count; i++) {
161 base[index] = src | prot | INTEL_PTE_VALID;
162 src += PAGE_SIZE;
163 index++;
164 }
165 }
166
167 extern pmap_paddr_t first_avail;
168
169 int break_kprintf = 0;
170
171 uint64_t
172 x86_64_pre_sleep(void)
173 {
174 IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX];
175 uint64_t oldcr3 = get_cr3_raw();
176 set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4));
177 return oldcr3;
178 }
179
180 void
181 x86_64_post_sleep(uint64_t new_cr3)
182 {
183 IdlePML4[0] = 0;
184 set_cr3_raw((uint32_t) new_cr3);
185 }
186
187
188
189
190 // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address
191 // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account
192 // the PCI hole (which is less 4GB but not more).
193
194 /* Compile-time guard: NPHYSMAP is capped to 256GiB, accounting for
195 * randomisation
196 */
197 extern int maxphymapsupported[NPHYSMAP <= (PTE_PER_PAGE/2) ? 1 : -1];
198
199 static void
200 physmap_init(void)
201 {
202 pt_entry_t *physmapL3 = ALLOCPAGES(1);
203 struct {
204 pt_entry_t entries[PTE_PER_PAGE];
205 } * physmapL2 = ALLOCPAGES(NPHYSMAP);
206
207 uint64_t i;
208 uint8_t phys_random_L3 = early_random() & 0xFF;
209
210 /* We assume NX support. Mark all levels of the PHYSMAP NX
211 * to avoid granting executability via a single bit flip.
212 */
213 #if DEVELOPMENT || DEBUG
214 uint32_t reg[4];
215 do_cpuid(0x80000000, reg);
216 if (reg[eax] >= 0x80000001) {
217 do_cpuid(0x80000001, reg);
218 assert(reg[edx] & CPUID_EXTFEATURE_XD);
219 }
220 #endif /* DEVELOPMENT || DEBUG */
221
222 for(i = 0; i < NPHYSMAP; i++) {
223 physmapL3[i + phys_random_L3] =
224 ((uintptr_t)ID_MAP_VTOP(&physmapL2[i]))
225 | INTEL_PTE_VALID
226 | INTEL_PTE_NX
227 | INTEL_PTE_WRITE;
228
229 uint64_t j;
230 for(j = 0; j < PTE_PER_PAGE; j++) {
231 physmapL2[i].entries[j] =
232 ((i * PTE_PER_PAGE + j) << PDSHIFT)
233 | INTEL_PTE_PS
234 | INTEL_PTE_VALID
235 | INTEL_PTE_NX
236 | INTEL_PTE_WRITE;
237 }
238 }
239
240 IdlePML4[KERNEL_PHYSMAP_PML4_INDEX] =
241 ((uintptr_t)ID_MAP_VTOP(physmapL3))
242 | INTEL_PTE_VALID
243 | INTEL_PTE_NX
244 | INTEL_PTE_WRITE;
245
246 physmap_base = KVADDR(KERNEL_PHYSMAP_PML4_INDEX, phys_random_L3, 0, 0);
247 physmap_max = physmap_base + NPHYSMAP * GB;
248 DBG("Physical address map base: 0x%qx\n", physmap_base);
249 DBG("Physical map idlepml4[%d]: 0x%llx\n",
250 KERNEL_PHYSMAP_PML4_INDEX, IdlePML4[KERNEL_PHYSMAP_PML4_INDEX]);
251 }
252
253 void doublemap_init(void);
254
255 static void
256 Idle_PTs_init(void)
257 {
258 /* Allocate the "idle" kernel page tables: */
259 KPTphys = ALLOCPAGES(NKPT); /* level 1 */
260 IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */
261 IdlePDPT = ALLOCPAGES(1); /* level 3 */
262 IdlePML4 = ALLOCPAGES(1); /* level 4 */
263
264 // Fill the lowest level with everything up to physfree
265 fillkpt(KPTphys,
266 INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT));
267
268 /* IdlePTD */
269 fillkpt(IdlePTD,
270 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT);
271
272 // IdlePDPT entries
273 fillkpt(IdlePDPT,
274 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD);
275
276 // IdlePML4 single entry for kernel space.
277 fillkpt(IdlePML4 + KERNEL_PML4_INDEX,
278 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1);
279
280 postcode(VSTART_PHYSMAP_INIT);
281
282 physmap_init();
283 doublemap_init();
284 idt64_remap();
285
286 postcode(VSTART_SET_CR3);
287
288 // Switch to the page tables..
289 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
290
291 }
292
293 extern void vstart_trap_handler;
294
295 #define BOOT_TRAP_VECTOR(t) \
296 [t] = { \
297 (uintptr_t) &vstart_trap_handler, \
298 KERNEL64_CS, \
299 0, \
300 ACC_P|ACC_PL_K|ACC_INTR_GATE, \
301 0 \
302 },
303
304 /* Recursive macro to iterate 0..31 */
305 #define L0(x,n) x(n)
306 #define L1(x,n) L0(x,n-1) L0(x,n)
307 #define L2(x,n) L1(x,n-2) L1(x,n)
308 #define L3(x,n) L2(x,n-4) L2(x,n)
309 #define L4(x,n) L3(x,n-8) L3(x,n)
310 #define L5(x,n) L4(x,n-16) L4(x,n)
311 #define FOR_0_TO_31(x) L5(x,31)
312
313 /*
314 * Bootstrap IDT. Active only during early startup.
315 * Only the trap vectors are defined since interrupts are masked.
316 * All traps point to a common handler.
317 */
318 struct fake_descriptor64 master_boot_idt64[IDTSZ]
319 __attribute__((section("__HIB,__desc")))
320 __attribute__((aligned(PAGE_SIZE))) = {
321 FOR_0_TO_31(BOOT_TRAP_VECTOR)
322 };
323
324 static void
325 vstart_idt_init(void)
326 {
327 x86_64_desc_register_t vstart_idt = {
328 sizeof(master_boot_idt64),
329 master_boot_idt64 };
330
331 fix_desc64(master_boot_idt64, 32);
332 lidt((void *)&vstart_idt);
333 }
334
335 /*
336 * vstart() is called in the natural mode (64bit for K64, 32 for K32)
337 * on a set of bootstrap pagetables which use large, 2MB pages to map
338 * all of physical memory in both. See idle_pt.c for details.
339 *
340 * In K64 this identity mapping is mirrored the top and bottom 512GB
341 * slots of PML4.
342 *
343 * The bootstrap processor called with argument boot_args_start pointing to
344 * the boot-args block. The kernel's (4K page) page tables are allocated and
345 * initialized before switching to these.
346 *
347 * Non-bootstrap processors are called with argument boot_args_start NULL.
348 * These processors switch immediately to the existing kernel page tables.
349 */
350 __attribute__((noreturn))
351 void
352 vstart(vm_offset_t boot_args_start)
353 {
354 boolean_t is_boot_cpu = !(boot_args_start == 0);
355 int cpu = 0;
356 uint32_t lphysfree;
357
358 postcode(VSTART_ENTRY);
359
360 if (is_boot_cpu) {
361 /*
362 * Set-up temporary trap handlers during page-table set-up.
363 */
364 vstart_idt_init();
365 postcode(VSTART_IDT_INIT);
366
367 /*
368 * Get startup parameters.
369 */
370 kernelBootArgs = (boot_args *)boot_args_start;
371 lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize;
372 physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1));
373
374 #if DEVELOPMENT || DEBUG
375 pal_serial_init();
376 #endif
377 DBG("revision 0x%x\n", kernelBootArgs->Revision);
378 DBG("version 0x%x\n", kernelBootArgs->Version);
379 DBG("command line %s\n", kernelBootArgs->CommandLine);
380 DBG("memory map 0x%x\n", kernelBootArgs->MemoryMap);
381 DBG("memory map sz 0x%x\n", kernelBootArgs->MemoryMapSize);
382 DBG("kaddr 0x%x\n", kernelBootArgs->kaddr);
383 DBG("ksize 0x%x\n", kernelBootArgs->ksize);
384 DBG("physfree %p\n", physfree);
385 DBG("bootargs: %p, &ksize: %p &kaddr: %p\n",
386 kernelBootArgs,
387 &kernelBootArgs->ksize,
388 &kernelBootArgs->kaddr);
389 DBG("SMBIOS mem sz 0x%llx\n", kernelBootArgs->PhysicalMemorySize);
390
391 /*
392 * Setup boot args given the physical start address.
393 * Note: PE_init_platform needs to be called before Idle_PTs_init
394 * because access to the DeviceTree is required to read the
395 * random seed before generating a random physical map slide.
396 */
397 kernelBootArgs = (boot_args *)
398 ml_static_ptovirt(boot_args_start);
399 DBG("i386_init(0x%lx) kernelBootArgs=%p\n",
400 (unsigned long)boot_args_start, kernelBootArgs);
401
402 #if KASAN
403 kasan_reserve_memory(kernelBootArgs);
404 #endif
405
406 PE_init_platform(FALSE, kernelBootArgs);
407 postcode(PE_INIT_PLATFORM_D);
408
409 Idle_PTs_init();
410 postcode(VSTART_IDLE_PTS_INIT);
411
412 #if KASAN
413 /* Init kasan and map whatever was stolen from physfree */
414 kasan_init();
415 kasan_notify_stolen((uintptr_t)ml_static_ptovirt((vm_offset_t)physfree));
416 #endif
417
418 #if MONOTONIC
419 mt_init();
420 #endif /* MONOTONIC */
421
422 first_avail = (vm_offset_t)ID_MAP_VTOP(physfree);
423
424 cpu_data_alloc(TRUE);
425
426 cpu_desc_init(cpu_datap(0));
427 postcode(VSTART_CPU_DESC_INIT);
428 cpu_desc_load(cpu_datap(0));
429
430 postcode(VSTART_CPU_MODE_INIT);
431 cpu_syscall_init(cpu_datap(0)); /* cpu_syscall_init() will be
432 * invoked on the APs
433 * via i386_init_slave()
434 */
435 } else {
436 /* Switch to kernel's page tables (from the Boot PTs) */
437 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
438 /* Find our logical cpu number */
439 cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK];
440 DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE));
441 cpu_desc_load(cpu_datap(cpu));
442 }
443
444 postcode(VSTART_EXIT);
445 x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init
446 : (uintptr_t) i386_init_slave,
447 cpu_datap(cpu)->cpu_int_stack_top);
448 }
449
450 void
451 pstate_trace(void)
452 {
453 }
454
455 /*
456 * Cpu initialization. Running virtual, but without MACH VM
457 * set up.
458 */
459 void
460 i386_init(void)
461 {
462 unsigned int maxmem;
463 uint64_t maxmemtouse;
464 unsigned int cpus = 0;
465 boolean_t fidn;
466 boolean_t IA32e = TRUE;
467
468 postcode(I386_INIT_ENTRY);
469
470 pal_i386_init();
471 tsc_init();
472 rtclock_early_init(); /* mach_absolute_time() now functionsl */
473
474 kernel_debug_string_early("i386_init");
475 pstate_trace();
476
477 #if CONFIG_MCA
478 /* Initialize machine-check handling */
479 mca_cpu_init();
480 #endif
481
482 master_cpu = 0;
483 cpu_init();
484
485 postcode(CPU_INIT_D);
486
487 printf_init(); /* Init this in case we need debugger */
488 panic_init(); /* Init this in case we need debugger */
489
490 /* setup debugging output if one has been chosen */
491 kernel_debug_string_early("PE_init_kprintf");
492 PE_init_kprintf(FALSE);
493
494 kernel_debug_string_early("kernel_early_bootstrap");
495 kernel_early_bootstrap();
496
497 if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags)))
498 dgWork.dgFlags = 0;
499
500 serialmode = 0;
501 if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) {
502 /* We want a serial keyboard and/or console */
503 kprintf("Serial mode specified: %08X\n", serialmode);
504 int force_sync = serialmode & SERIALMODE_SYNCDRAIN;
505 if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) {
506 if (force_sync) {
507 serialmode |= SERIALMODE_SYNCDRAIN;
508 kprintf(
509 "WARNING: Forcing uart driver to output synchronously."
510 "printf()s/IOLogs will impact kernel performance.\n"
511 "You are advised to avoid using 'drain_uart_sync' boot-arg.\n");
512 }
513 }
514 }
515 if (serialmode & SERIALMODE_OUTPUT) {
516 (void)switch_to_serial_console();
517 disableConsoleOutput = FALSE; /* Allow printfs to happen */
518 }
519
520 /* setup console output */
521 kernel_debug_string_early("PE_init_printf");
522 PE_init_printf(FALSE);
523
524 kprintf("version_variant = %s\n", version_variant);
525 kprintf("version = %s\n", version);
526
527 if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem)))
528 maxmemtouse = 0;
529 else
530 maxmemtouse = ((uint64_t)maxmem) * MB;
531
532 if (PE_parse_boot_argn("cpus", &cpus, sizeof (cpus))) {
533 if ((0 < cpus) && (cpus < max_ncpus))
534 max_ncpus = cpus;
535 }
536
537 /*
538 * debug support for > 4G systems
539 */
540 PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof (vm_himemory_mode));
541 if (vm_himemory_mode != 0)
542 kprintf("himemory_mode: %d\n", vm_himemory_mode);
543
544 if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn)))
545 force_immediate_debugger_NMI = FALSE;
546 else
547 force_immediate_debugger_NMI = fidn;
548
549 #if DEBUG
550 nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold);
551 #endif
552 PE_parse_boot_argn("urgency_notification_abstime",
553 &urgency_notification_assert_abstime_threshold,
554 sizeof(urgency_notification_assert_abstime_threshold));
555
556 if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD))
557 nx_enabled = 0;
558
559 /*
560 * VM initialization, after this we're using page tables...
561 * Thn maximum number of cpus must be set beforehand.
562 */
563 kernel_debug_string_early("i386_vm_init");
564 i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);
565
566 /* create the console for verbose or pretty mode */
567 /* Note: doing this prior to tsc_init() allows for graceful panic! */
568 PE_init_platform(TRUE, kernelBootArgs);
569 PE_create_console();
570
571 kernel_debug_string_early("power_management_init");
572 power_management_init();
573 processor_bootstrap();
574 thread_bootstrap();
575
576 pstate_trace();
577 kernel_debug_string_early("machine_startup");
578 machine_startup();
579 pstate_trace();
580 }
581
582 static void
583 do_init_slave(boolean_t fast_restart)
584 {
585 void *init_param = FULL_SLAVE_INIT;
586
587 postcode(I386_INIT_SLAVE);
588
589 if (!fast_restart) {
590 /* Ensure that caching and write-through are enabled */
591 set_cr0(get_cr0() & ~(CR0_NW|CR0_CD));
592
593 DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
594 get_cpu_number(), get_cpu_phys_number());
595
596 assert(!ml_get_interrupts_enabled());
597
598 cpu_syscall_init(current_cpu_datap());
599 pmap_cpu_init();
600
601 #if CONFIG_MCA
602 mca_cpu_init();
603 #endif
604
605 LAPIC_INIT();
606 lapic_configure();
607 LAPIC_DUMP();
608 LAPIC_CPU_MAP_DUMP();
609
610 init_fpu();
611
612 #if CONFIG_MTRR
613 mtrr_update_cpu();
614 #endif
615 /* update CPU microcode */
616 ucode_update_wake();
617 } else
618 init_param = FAST_SLAVE_INIT;
619
620 #if CONFIG_VMX
621 /* resume VT operation */
622 vmx_resume(FALSE);
623 #endif
624
625 #if CONFIG_MTRR
626 if (!fast_restart)
627 pat_init();
628 #endif
629
630 cpu_thread_init(); /* not strictly necessary */
631
632 cpu_init(); /* Sets cpu_running which starter cpu waits for */
633 slave_main(init_param);
634
635 panic("do_init_slave() returned from slave_main()");
636 }
637
638 /*
639 * i386_init_slave() is called from pstart.
640 * We're in the cpu's interrupt stack with interrupts disabled.
641 * At this point we are in legacy mode. We need to switch on IA32e
642 * if the mode is set to 64-bits.
643 */
644 void
645 i386_init_slave(void)
646 {
647 do_init_slave(FALSE);
648 }
649
650 /*
651 * i386_init_slave_fast() is called from pmCPUHalt.
652 * We're running on the idle thread and need to fix up
653 * some accounting and get it so that the scheduler sees this
654 * CPU again.
655 */
656 void
657 i386_init_slave_fast(void)
658 {
659 do_init_slave(TRUE);
660 }
661
662 #include <libkern/kernel_mach_header.h>
663
664 /* TODO: Evaluate global PTEs for the double-mapped translations */
665
666 uint64_t dblmap_base, dblmap_max;
667 kernel_segment_command_t *hdescseg;
668
669 pt_entry_t *dblmapL3;
670 unsigned int dblallocs;
671 uint64_t dblmap_dist;
672 extern uint64_t idt64_hndl_table0[];
673
674
675 void doublemap_init(void) {
676 dblmapL3 = ALLOCPAGES(1); // for 512 1GiB entries
677 dblallocs++;
678
679 struct {
680 pt_entry_t entries[PTE_PER_PAGE];
681 } * dblmapL2 = ALLOCPAGES(1); // for 512 2MiB entries
682 dblallocs++;
683
684 dblmapL3[0] = ((uintptr_t)ID_MAP_VTOP(&dblmapL2[0]))
685 | INTEL_PTE_VALID
686 | INTEL_PTE_WRITE;
687
688 hdescseg = getsegbynamefromheader(&_mh_execute_header, "__HIB");
689
690 vm_offset_t hdescb = hdescseg->vmaddr;
691 unsigned long hdescsz = hdescseg->vmsize;
692 unsigned long hdescszr = round_page_64(hdescsz);
693 vm_offset_t hdescc = hdescb, hdesce = hdescb + hdescszr;
694
695 kernel_section_t *thdescsect = getsectbynamefromheader(&_mh_execute_header, "__HIB", "__text");
696 vm_offset_t thdescb = thdescsect->addr;
697 unsigned long thdescsz = thdescsect->size;
698 unsigned long thdescszr = round_page_64(thdescsz);
699 vm_offset_t thdesce = thdescb + thdescszr;
700
701 assert((hdescb & 0xFFF) == 0);
702 /* Mirror HIB translations into the double-mapped pagetable subtree*/
703 for(int i = 0; hdescc < hdesce; i++) {
704 struct {
705 pt_entry_t entries[PTE_PER_PAGE];
706 } * dblmapL1 = ALLOCPAGES(1);
707 dblallocs++;
708 dblmapL2[0].entries[i] = ((uintptr_t)ID_MAP_VTOP(&dblmapL1[0])) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF;
709 int hdescn = (int) ((hdesce - hdescc) / PAGE_SIZE);
710 for (int j = 0; j < MIN(PTE_PER_PAGE, hdescn); j++) {
711 uint64_t template = INTEL_PTE_VALID;
712 if ((hdescc >= thdescb) && (hdescc < thdesce)) {
713 /* executable */
714 } else {
715 template |= INTEL_PTE_WRITE | INTEL_PTE_NX ; /* Writeable, NX */
716 }
717 dblmapL1[0].entries[j] = ((uintptr_t)ID_MAP_VTOP(hdescc)) | template;
718 hdescc += PAGE_SIZE;
719 }
720 }
721
722 IdlePML4[KERNEL_DBLMAP_PML4_INDEX] = ((uintptr_t)ID_MAP_VTOP(dblmapL3)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF;
723
724 dblmap_base = KVADDR(KERNEL_DBLMAP_PML4_INDEX, dblmapL3, 0, 0);
725 dblmap_max = dblmap_base + hdescszr;
726 /* Calculate the double-map distance, which accounts for the current
727 * KASLR slide
728 */
729
730 dblmap_dist = dblmap_base - hdescb;
731 idt64_hndl_table0[1] = DBLMAP(idt64_hndl_table0[1]);
732
733 extern cpu_data_t cpshadows[], scdatas[];
734 uintptr_t cd1 = (uintptr_t) &cpshadows[0];
735 uintptr_t cd2 = (uintptr_t) &scdatas[0];
736 /* Record the displacement from the kernel's per-CPU data pointer, eventually
737 * programmed into GSBASE, to the "shadows" in the doublemapped
738 * region. These are not aliases, but separate physical allocations
739 * containing data required in the doublemapped trampolines.
740 */
741 idt64_hndl_table0[2] = dblmap_dist + cd1 - cd2;
742
743 DBG("Double map base: 0x%qx\n", dblmap_base);
744 DBG("double map idlepml4[%d]: 0x%llx\n", KERNEL_DBLMAP_PML4_INDEX, IdlePML4[KERNEL_DBLMAP_PML4_INDEX]);
745 assert(LDTSZ > LDTSZ_MIN);
746 }
747
748 vm_offset_t dyn_dblmap(vm_offset_t, vm_offset_t);
749
750 #include <i386/pmap_internal.h>
751
752 /* Use of this routine is expected to be synchronized by callers
753 * Creates non-executable aliases.
754 */
755 vm_offset_t dyn_dblmap(vm_offset_t cva, vm_offset_t sz) {
756 vm_offset_t ava = dblmap_max;
757
758 assert((sz & PAGE_MASK) == 0);
759 assert(cva != 0);
760
761 pmap_alias(ava, cva, cva + sz, VM_PROT_READ | VM_PROT_WRITE, PMAP_EXPAND_OPTIONS_ALIASMAP);
762 dblmap_max += sz;
763 return (ava - cva);
764 }
765 /* Adjust offsets interior to the bootstrap interrupt descriptor table to redirect
766 * control to the double-mapped interrupt vectors. The IDTR proper will be
767 * programmed via cpu_desc_load()
768 */
769 void idt64_remap(void) {
770 for (int i = 0; i < IDTSZ; i++) {
771 master_idt64[i].offset64 = DBLMAP(master_idt64[i].offset64);
772 }
773 }