]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | /* | |
57 | * NOTICE: This file was modified by McAfee Research in 2004 to introduce | |
58 | * support for mandatory and extensible security protections. This notice | |
59 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
60 | * Version 2.0. | |
61 | */ | |
62 | /* | |
63 | */ | |
64 | ||
65 | /* | |
66 | * Mach kernel startup. | |
67 | */ | |
68 | ||
69 | #include <debug.h> | |
70 | #include <mach_kdp.h> | |
71 | ||
72 | #include <mach/boolean.h> | |
73 | #include <mach/machine.h> | |
74 | #include <mach/thread_act.h> | |
75 | #include <mach/task_special_ports.h> | |
76 | #include <mach/vm_param.h> | |
77 | #include <ipc/ipc_init.h> | |
78 | #include <kern/assert.h> | |
79 | #include <kern/mach_param.h> | |
80 | #include <kern/misc_protos.h> | |
81 | #include <kern/clock.h> | |
82 | #include <kern/coalition.h> | |
83 | #include <kern/cpu_number.h> | |
84 | #include <kern/cpu_quiesce.h> | |
85 | #include <kern/ledger.h> | |
86 | #include <kern/machine.h> | |
87 | #include <kern/processor.h> | |
88 | #include <kern/restartable.h> | |
89 | #include <kern/sched_prim.h> | |
90 | #include <kern/turnstile.h> | |
91 | #if CONFIG_SCHED_SFI | |
92 | #include <kern/sfi.h> | |
93 | #endif | |
94 | #include <kern/startup.h> | |
95 | #include <kern/task.h> | |
96 | #include <kern/thread.h> | |
97 | #include <kern/timer.h> | |
98 | #if CONFIG_TELEMETRY | |
99 | #include <kern/telemetry.h> | |
100 | #endif | |
101 | #include <kern/zalloc.h> | |
102 | #include <kern/locks.h> | |
103 | #include <kern/debug.h> | |
104 | #if KPERF | |
105 | #include <kperf/kperf.h> | |
106 | #endif /* KPERF */ | |
107 | #include <corpses/task_corpse.h> | |
108 | #include <prng/random.h> | |
109 | #include <console/serial_protos.h> | |
110 | #include <vm/vm_kern.h> | |
111 | #include <vm/vm_init.h> | |
112 | #include <vm/vm_map.h> | |
113 | #include <vm/vm_object.h> | |
114 | #include <vm/vm_page.h> | |
115 | #include <vm/vm_pageout.h> | |
116 | #include <vm/vm_shared_region.h> | |
117 | #include <machine/pmap.h> | |
118 | #include <machine/commpage.h> | |
119 | #include <machine/machine_routines.h> | |
120 | #include <libkern/version.h> | |
121 | #include <sys/codesign.h> | |
122 | #include <sys/kdebug.h> | |
123 | #include <sys/random.h> | |
124 | #include <sys/ktrace.h> | |
125 | #include <libkern/section_keywords.h> | |
126 | ||
127 | #include <kern/ltable.h> | |
128 | #include <kern/waitq.h> | |
129 | #include <ipc/ipc_voucher.h> | |
130 | #include <voucher/ipc_pthread_priority_internal.h> | |
131 | #include <mach/host_info.h> | |
132 | #include <pthread/workqueue_internal.h> | |
133 | ||
134 | #if CONFIG_XNUPOST | |
135 | #include <tests/ktest.h> | |
136 | #include <tests/xnupost.h> | |
137 | #endif | |
138 | ||
139 | #if CONFIG_ATM | |
140 | #include <atm/atm_internal.h> | |
141 | #endif | |
142 | ||
143 | #if CONFIG_CSR | |
144 | #include <sys/csr.h> | |
145 | #endif | |
146 | ||
147 | #include <bank/bank_internal.h> | |
148 | ||
149 | #if ALTERNATE_DEBUGGER | |
150 | #include <arm64/alternate_debugger.h> | |
151 | #endif | |
152 | ||
153 | #if MACH_KDP | |
154 | #include <kdp/kdp.h> | |
155 | #endif | |
156 | ||
157 | #if CONFIG_MACF | |
158 | #include <security/mac_mach_internal.h> | |
159 | #if CONFIG_VNGUARD | |
160 | extern void vnguard_policy_init(void); | |
161 | #endif | |
162 | #endif | |
163 | ||
164 | #if KPC | |
165 | #include <kern/kpc.h> | |
166 | #endif | |
167 | ||
168 | #if HYPERVISOR | |
169 | #include <kern/hv_support.h> | |
170 | #endif | |
171 | ||
172 | #include <san/kasan.h> | |
173 | ||
174 | #include <i386/pmCPU.h> | |
175 | static void kernel_bootstrap_thread(void); | |
176 | ||
177 | static void load_context( | |
178 | thread_t thread); | |
179 | ||
180 | #if CONFIG_ECC_LOGGING | |
181 | #include <kern/ecc.h> | |
182 | #endif | |
183 | ||
184 | #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX | |
185 | #include <i386/vmx/vmx_cpu.h> | |
186 | #endif | |
187 | ||
188 | #if CONFIG_DTRACE | |
189 | extern void dtrace_early_init(void); | |
190 | extern void sdt_early_init(void); | |
191 | #endif | |
192 | ||
193 | // libkern/OSKextLib.cpp | |
194 | extern void OSKextRemoveKextBootstrap(void); | |
195 | ||
196 | void scale_setup(void); | |
197 | extern void bsd_scale_setup(int); | |
198 | extern unsigned int semaphore_max; | |
199 | extern void stackshot_init(void); | |
200 | ||
201 | /* | |
202 | * Running in virtual memory, on the interrupt stack. | |
203 | */ | |
204 | ||
205 | extern struct startup_entry startup_entries[] | |
206 | __SECTION_START_SYM(STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); | |
207 | ||
208 | extern struct startup_entry startup_entries_end[] | |
209 | __SECTION_END_SYM(STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); | |
210 | ||
211 | static struct startup_entry *__startup_data startup_entry_cur = startup_entries; | |
212 | ||
213 | SECURITY_READ_ONLY_LATE(startup_subsystem_id_t) startup_phase = STARTUP_SUB_NONE; | |
214 | ||
215 | extern int serverperfmode; | |
216 | ||
217 | #if DEBUG || DEVELOPMENT | |
218 | TUNABLE(startup_debug_t, startup_debug, "startup_debug", 0); | |
219 | #endif | |
220 | ||
221 | /* size of kernel trace buffer, disabled by default */ | |
222 | TUNABLE(unsigned int, new_nkdbufs, "trace", 0); | |
223 | TUNABLE(unsigned int, wake_nkdbufs, "trace_wake", 0); | |
224 | TUNABLE(unsigned int, write_trace_on_panic, "trace_panic", 0); | |
225 | TUNABLE(unsigned int, trace_wrap, "trace_wrap", 0); | |
226 | ||
227 | /* mach leak logging */ | |
228 | TUNABLE(int, log_leaks, "-l", 0); | |
229 | ||
230 | static inline void | |
231 | kernel_bootstrap_log(const char *message) | |
232 | { | |
233 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && | |
234 | startup_phase >= STARTUP_SUB_KPRINTF) { | |
235 | kprintf("kernel_bootstrap: %s\n", message); | |
236 | } | |
237 | kernel_debug_string_early(message); | |
238 | } | |
239 | ||
240 | static inline void | |
241 | kernel_bootstrap_thread_log(const char *message) | |
242 | { | |
243 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && | |
244 | startup_phase >= STARTUP_SUB_KPRINTF) { | |
245 | kprintf("kernel_bootstrap_thread: %s\n", message); | |
246 | } | |
247 | kernel_debug_string_early(message); | |
248 | } | |
249 | ||
250 | extern void | |
251 | qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)); | |
252 | ||
253 | __startup_func | |
254 | static int | |
255 | startup_entry_cmp(const void *e1, const void *e2) | |
256 | { | |
257 | const struct startup_entry *a = e1; | |
258 | const struct startup_entry *b = e2; | |
259 | if (a->subsystem == b->subsystem) { | |
260 | if (a->rank == b->rank) { | |
261 | return 0; | |
262 | } | |
263 | return a->rank > b->rank ? 1 : -1; | |
264 | } | |
265 | return a->subsystem > b->subsystem ? 1 : -1; | |
266 | } | |
267 | ||
268 | __startup_func | |
269 | void | |
270 | kernel_startup_bootstrap(void) | |
271 | { | |
272 | /* | |
273 | * Sort the various STARTUP() entries by subsystem/rank. | |
274 | */ | |
275 | size_t n = startup_entries_end - startup_entries; | |
276 | ||
277 | if (n == 0) { | |
278 | panic("Section %s,%s missing", | |
279 | STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); | |
280 | } | |
281 | if (((uintptr_t)startup_entries_end - (uintptr_t)startup_entries) % | |
282 | sizeof(struct startup_entry)) { | |
283 | panic("Section %s,%s has invalid size", | |
284 | STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); | |
285 | } | |
286 | ||
287 | qsort(startup_entries, n, sizeof(struct startup_entry), startup_entry_cmp); | |
288 | ||
289 | /* | |
290 | * Then initialize all tunables, and early locks | |
291 | */ | |
292 | kernel_startup_initialize_upto(STARTUP_SUB_LOCKS_EARLY); | |
293 | } | |
294 | ||
295 | __startup_func | |
296 | extern void | |
297 | kernel_startup_tunable_init(const struct startup_tunable_spec *spec) | |
298 | { | |
299 | if (PE_parse_boot_argn(spec->name, spec->var_addr, spec->var_len)) { | |
300 | if (spec->var_is_bool) { | |
301 | /* make sure bool's are valued in {0, 1} */ | |
302 | *(bool *)spec->var_addr = *(uint8_t *)spec->var_addr; | |
303 | } | |
304 | } | |
305 | } | |
306 | ||
307 | static void | |
308 | kernel_startup_log(startup_subsystem_id_t subsystem) | |
309 | { | |
310 | static const char *names[] = { | |
311 | [STARTUP_SUB_TUNABLES] = "tunables", | |
312 | [STARTUP_SUB_LOCKS_EARLY] = "locks_early", | |
313 | [STARTUP_SUB_KPRINTF] = "kprintf", | |
314 | ||
315 | [STARTUP_SUB_PMAP_STEAL] = "pmap_steal", | |
316 | [STARTUP_SUB_VM_KERNEL] = "vm_kernel", | |
317 | [STARTUP_SUB_KMEM] = "kmem", | |
318 | [STARTUP_SUB_KMEM_ALLOC] = "kmem_alloc", | |
319 | [STARTUP_SUB_ZALLOC] = "zalloc", | |
320 | [STARTUP_SUB_PERCPU] = "percpu", | |
321 | [STARTUP_SUB_LOCKS] = "locks", | |
322 | ||
323 | [STARTUP_SUB_CODESIGNING] = "codesigning", | |
324 | [STARTUP_SUB_OSLOG] = "oslog", | |
325 | [STARTUP_SUB_MACH_IPC] = "mach_ipc", | |
326 | [STARTUP_SUB_SYSCTL] = "sysctl", | |
327 | [STARTUP_SUB_EARLY_BOOT] = "early_boot", | |
328 | ||
329 | /* LOCKDOWN is special and its value won't fit here. */ | |
330 | }; | |
331 | static startup_subsystem_id_t logged = STARTUP_SUB_NONE; | |
332 | ||
333 | if (subsystem <= logged) { | |
334 | return; | |
335 | } | |
336 | ||
337 | if (subsystem < sizeof(names) / sizeof(names[0]) && names[subsystem]) { | |
338 | kernel_bootstrap_log(names[subsystem]); | |
339 | } | |
340 | logged = subsystem; | |
341 | } | |
342 | ||
343 | __startup_func | |
344 | void | |
345 | kernel_startup_initialize_upto(startup_subsystem_id_t upto) | |
346 | { | |
347 | struct startup_entry *cur = startup_entry_cur; | |
348 | ||
349 | assert(startup_phase < upto); | |
350 | ||
351 | while (cur < startup_entries_end && cur->subsystem <= upto) { | |
352 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && | |
353 | startup_phase >= STARTUP_SUB_KPRINTF) { | |
354 | kprintf("%s[%d, rank %d]: %p(%p)\n", __func__, | |
355 | cur->subsystem, cur->rank, cur->func, cur->arg); | |
356 | } | |
357 | startup_phase = cur->subsystem - 1; | |
358 | kernel_startup_log(cur->subsystem); | |
359 | cur->func(cur->arg); | |
360 | startup_entry_cur = ++cur; | |
361 | } | |
362 | kernel_startup_log(upto); | |
363 | ||
364 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && | |
365 | upto >= STARTUP_SUB_KPRINTF) { | |
366 | kprintf("%s: reached phase %d\n", __func__, upto); | |
367 | } | |
368 | startup_phase = upto; | |
369 | } | |
370 | ||
371 | void | |
372 | kernel_bootstrap(void) | |
373 | { | |
374 | kern_return_t result; | |
375 | thread_t thread; | |
376 | char namep[16]; | |
377 | ||
378 | printf("%s\n", version); /* log kernel version */ | |
379 | ||
380 | scale_setup(); | |
381 | ||
382 | kernel_bootstrap_log("vm_mem_bootstrap"); | |
383 | vm_mem_bootstrap(); | |
384 | ||
385 | machine_info.memory_size = (uint32_t)mem_size; | |
386 | #if XNU_TARGET_OS_OSX | |
387 | machine_info.max_mem = max_mem_actual; | |
388 | #else | |
389 | machine_info.max_mem = max_mem; | |
390 | #endif /* XNU_TARGET_OS_OSX */ | |
391 | machine_info.major_version = version_major; | |
392 | machine_info.minor_version = version_minor; | |
393 | ||
394 | kernel_startup_initialize_upto(STARTUP_SUB_OSLOG); | |
395 | ||
396 | #if KASAN | |
397 | kernel_bootstrap_log("kasan_late_init"); | |
398 | kasan_late_init(); | |
399 | #endif | |
400 | ||
401 | #if CONFIG_TELEMETRY | |
402 | kernel_bootstrap_log("telemetry_init"); | |
403 | telemetry_init(); | |
404 | #endif | |
405 | ||
406 | if (PE_i_can_has_debugger(NULL)) { | |
407 | if (PE_parse_boot_argn("-show_pointers", &namep, sizeof(namep))) { | |
408 | doprnt_hide_pointers = FALSE; | |
409 | } | |
410 | if (PE_parse_boot_argn("-no_slto_panic", &namep, sizeof(namep))) { | |
411 | extern boolean_t spinlock_timeout_panic; | |
412 | spinlock_timeout_panic = FALSE; | |
413 | } | |
414 | } | |
415 | ||
416 | kernel_bootstrap_log("console_init"); | |
417 | console_init(); | |
418 | ||
419 | kernel_bootstrap_log("stackshot_init"); | |
420 | stackshot_init(); | |
421 | ||
422 | kernel_bootstrap_log("sched_init"); | |
423 | sched_init(); | |
424 | ||
425 | kernel_bootstrap_log("waitq_bootstrap"); | |
426 | waitq_bootstrap(); | |
427 | ||
428 | #if CONFIG_MACF | |
429 | kernel_bootstrap_log("mac_policy_init"); | |
430 | mac_policy_init(); | |
431 | #endif | |
432 | ||
433 | kernel_startup_initialize_upto(STARTUP_SUB_MACH_IPC); | |
434 | ||
435 | /* | |
436 | * As soon as the virtual memory system is up, we record | |
437 | * that this CPU is using the kernel pmap. | |
438 | */ | |
439 | kernel_bootstrap_log("PMAP_ACTIVATE_KERNEL"); | |
440 | PMAP_ACTIVATE_KERNEL(master_cpu); | |
441 | ||
442 | kernel_bootstrap_log("mapping_free_prime"); | |
443 | mapping_free_prime(); /* Load up with temporary mapping blocks */ | |
444 | ||
445 | kernel_bootstrap_log("machine_init"); | |
446 | machine_init(); | |
447 | ||
448 | kernel_bootstrap_log("thread_machine_init_template"); | |
449 | thread_machine_init_template(); | |
450 | ||
451 | kernel_bootstrap_log("clock_init"); | |
452 | clock_init(); | |
453 | ||
454 | /* | |
455 | * Initialize the IPC, task, and thread subsystems. | |
456 | */ | |
457 | #if CONFIG_THREAD_GROUPS | |
458 | kernel_bootstrap_log("thread_group_init"); | |
459 | thread_group_init(); | |
460 | #endif | |
461 | ||
462 | #if CONFIG_COALITIONS | |
463 | kernel_bootstrap_log("coalitions_init"); | |
464 | coalitions_init(); | |
465 | #endif | |
466 | ||
467 | kernel_bootstrap_log("task_init"); | |
468 | task_init(); | |
469 | ||
470 | kernel_bootstrap_log("thread_init"); | |
471 | thread_init(); | |
472 | ||
473 | kernel_bootstrap_log("restartable_init"); | |
474 | restartable_init(); | |
475 | ||
476 | kernel_bootstrap_log("workq_init"); | |
477 | workq_init(); | |
478 | ||
479 | kernel_bootstrap_log("turnstiles_init"); | |
480 | turnstiles_init(); | |
481 | ||
482 | #if CONFIG_ATM | |
483 | /* Initialize the Activity Trace Resource Manager. */ | |
484 | kernel_bootstrap_log("atm_init"); | |
485 | atm_init(); | |
486 | #endif | |
487 | kernel_bootstrap_log("mach_init_activity_id"); | |
488 | mach_init_activity_id(); | |
489 | ||
490 | /* Initialize the BANK Manager. */ | |
491 | kernel_bootstrap_log("bank_init"); | |
492 | bank_init(); | |
493 | ||
494 | kernel_bootstrap_log("ipc_pthread_priority_init"); | |
495 | ipc_pthread_priority_init(); | |
496 | ||
497 | /* initialize the corpse config based on boot-args */ | |
498 | corpses_init(); | |
499 | ||
500 | /* initialize host_statistics */ | |
501 | host_statistics_init(); | |
502 | ||
503 | /* initialize exceptions */ | |
504 | kernel_bootstrap_log("exception_init"); | |
505 | exception_init(); | |
506 | ||
507 | #if CONFIG_SCHED_SFI | |
508 | kernel_bootstrap_log("sfi_init"); | |
509 | sfi_init(); | |
510 | #endif | |
511 | ||
512 | /* | |
513 | * Create a kernel thread to execute the kernel bootstrap. | |
514 | */ | |
515 | ||
516 | kernel_bootstrap_log("kernel_thread_create"); | |
517 | result = kernel_thread_create((thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, &thread); | |
518 | ||
519 | if (result != KERN_SUCCESS) { | |
520 | panic("kernel_bootstrap: result = %08X\n", result); | |
521 | } | |
522 | ||
523 | /* The static init_thread is re-used as the bootstrap thread */ | |
524 | assert(thread == current_thread()); | |
525 | ||
526 | /* TODO: do a proper thread_start() (without the thread_setrun()) */ | |
527 | thread->state = TH_RUN; | |
528 | thread->last_made_runnable_time = mach_absolute_time(); | |
529 | thread_set_thread_name(thread, "kernel_bootstrap_thread"); | |
530 | ||
531 | thread_deallocate(thread); | |
532 | ||
533 | kernel_bootstrap_log("load_context - done"); | |
534 | load_context(thread); | |
535 | /*NOTREACHED*/ | |
536 | } | |
537 | ||
538 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_addrperm; | |
539 | SECURITY_READ_ONLY_LATE(vm_offset_t) buf_kernel_addrperm; | |
540 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_addrperm_ext; | |
541 | SECURITY_READ_ONLY_LATE(uint64_t) vm_kernel_addrhash_salt; | |
542 | SECURITY_READ_ONLY_LATE(uint64_t) vm_kernel_addrhash_salt_ext; | |
543 | ||
544 | /* | |
545 | * Now running in a thread. Kick off other services, | |
546 | * invoke user bootstrap, enter pageout loop. | |
547 | */ | |
548 | static void | |
549 | kernel_bootstrap_thread(void) | |
550 | { | |
551 | processor_t processor = current_processor(); | |
552 | ||
553 | kernel_bootstrap_thread_log("idle_thread_create"); | |
554 | /* | |
555 | * Create the idle processor thread. | |
556 | */ | |
557 | idle_thread_create(processor); | |
558 | ||
559 | /* | |
560 | * N.B. Do not stick anything else | |
561 | * before this point. | |
562 | * | |
563 | * Start up the scheduler services. | |
564 | */ | |
565 | kernel_bootstrap_thread_log("sched_startup"); | |
566 | sched_startup(); | |
567 | ||
568 | /* | |
569 | * Thread lifecycle maintenance (teardown, stack allocation) | |
570 | */ | |
571 | kernel_bootstrap_thread_log("thread_daemon_init"); | |
572 | thread_daemon_init(); | |
573 | ||
574 | /* Create kernel map entry reserve */ | |
575 | vm_kernel_reserved_entry_init(); | |
576 | ||
577 | /* | |
578 | * Thread callout service. | |
579 | */ | |
580 | kernel_bootstrap_thread_log("thread_call_initialize"); | |
581 | thread_call_initialize(); | |
582 | ||
583 | /* | |
584 | * Work interval subsystem initialization. | |
585 | * Needs to be done once thread calls have been initialized. | |
586 | */ | |
587 | kernel_bootstrap_thread_log("work_interval_initialize"); | |
588 | work_interval_subsystem_init(); | |
589 | ||
590 | /* | |
591 | * Remain on current processor as | |
592 | * additional processors come online. | |
593 | */ | |
594 | kernel_bootstrap_thread_log("thread_bind"); | |
595 | thread_bind(processor); | |
596 | ||
597 | /* | |
598 | * Initialize ipc thread call support. | |
599 | */ | |
600 | kernel_bootstrap_thread_log("ipc_thread_call_init"); | |
601 | ipc_thread_call_init(); | |
602 | ||
603 | /* | |
604 | * Kick off memory mapping adjustments. | |
605 | */ | |
606 | kernel_bootstrap_thread_log("mapping_adjust"); | |
607 | mapping_adjust(); | |
608 | ||
609 | /* | |
610 | * Create the clock service. | |
611 | */ | |
612 | kernel_bootstrap_thread_log("clock_service_create"); | |
613 | clock_service_create(); | |
614 | ||
615 | /* | |
616 | * Create the device service. | |
617 | */ | |
618 | device_service_create(); | |
619 | ||
620 | phys_carveout_init(); | |
621 | ||
622 | #if MACH_KDP | |
623 | kernel_bootstrap_log("kdp_init"); | |
624 | kdp_init(); | |
625 | #endif | |
626 | ||
627 | #if ALTERNATE_DEBUGGER | |
628 | alternate_debugger_init(); | |
629 | #endif | |
630 | ||
631 | #if KPC | |
632 | kpc_init(); | |
633 | #endif | |
634 | ||
635 | #if HYPERVISOR | |
636 | kernel_bootstrap_thread_log("hv_support_init"); | |
637 | hv_support_init(); | |
638 | #endif | |
639 | ||
640 | #if CONFIG_TELEMETRY | |
641 | kernel_bootstrap_log("bootprofile_init"); | |
642 | bootprofile_init(); | |
643 | #endif | |
644 | ||
645 | char trace_typefilter[256] = {}; | |
646 | PE_parse_boot_arg_str("trace_typefilter", trace_typefilter, | |
647 | sizeof(trace_typefilter)); | |
648 | #if KPERF | |
649 | kperf_init(); | |
650 | #endif /* KPERF */ | |
651 | kdebug_init(new_nkdbufs, trace_typefilter, | |
652 | (trace_wrap ? KDOPT_WRAPPING : 0) | KDOPT_ATBOOT); | |
653 | ||
654 | kernel_startup_initialize_upto(STARTUP_SUB_SYSCTL); | |
655 | ||
656 | #ifdef IOKIT | |
657 | kernel_bootstrap_log("PE_init_iokit"); | |
658 | PE_init_iokit(); | |
659 | #endif | |
660 | ||
661 | assert(ml_get_interrupts_enabled() == FALSE); | |
662 | ||
663 | /* | |
664 | * Past this point, kernel subsystems that expect to operate with | |
665 | * interrupts or preemption enabled may begin enforcement. | |
666 | */ | |
667 | kernel_startup_initialize_upto(STARTUP_SUB_EARLY_BOOT); | |
668 | ||
669 | #if INTERRUPT_MASKED_DEBUG | |
670 | // Reset interrupts masked timeout before we enable interrupts | |
671 | ml_spin_debug_clear_self(); | |
672 | #endif | |
673 | (void) spllo(); /* Allow interruptions */ | |
674 | ||
675 | /* | |
676 | * This will start displaying progress to the user, start as early as possible | |
677 | */ | |
678 | initialize_screen(NULL, kPEAcquireScreen); | |
679 | ||
680 | /* | |
681 | * Initialize the shared region module. | |
682 | */ | |
683 | vm_commpage_init(); | |
684 | vm_commpage_text_init(); | |
685 | ||
686 | #if CONFIG_MACF | |
687 | kernel_bootstrap_log("mac_policy_initmach"); | |
688 | mac_policy_initmach(); | |
689 | #if CONFIG_VNGUARD | |
690 | kernel_bootstrap_log("vnguard_policy_init"); | |
691 | vnguard_policy_init(); | |
692 | #endif | |
693 | #endif | |
694 | ||
695 | #if CONFIG_DTRACE | |
696 | kernel_bootstrap_log("dtrace_early_init"); | |
697 | dtrace_early_init(); | |
698 | sdt_early_init(); | |
699 | #endif | |
700 | ||
701 | #ifndef BCM2837 | |
702 | kernel_bootstrap_log("trust_cache_init"); | |
703 | trust_cache_init(); | |
704 | #endif | |
705 | ||
706 | kernel_startup_initialize_upto(STARTUP_SUB_LOCKDOWN); | |
707 | ||
708 | /* | |
709 | * Get rid of segments used to bootstrap kext loading. This removes | |
710 | * the KLD, PRELINK symtab, LINKEDIT, and symtab segments/load commands. | |
711 | * Must be done prior to lockdown so that we can free (and possibly relocate) | |
712 | * the static KVA mappings used for the jettisoned bootstrap segments. | |
713 | */ | |
714 | kernel_bootstrap_log("OSKextRemoveKextBootstrap"); | |
715 | OSKextRemoveKextBootstrap(); | |
716 | ||
717 | /* | |
718 | * Initialize the globals used for permuting kernel | |
719 | * addresses that may be exported to userland as tokens | |
720 | * using VM_KERNEL_ADDRPERM()/VM_KERNEL_ADDRPERM_EXTERNAL(). | |
721 | * Force the random number to be odd to avoid mapping a non-zero | |
722 | * word-aligned address to zero via addition. | |
723 | * Note: at this stage we can use the cryptographically secure PRNG | |
724 | * rather than early_random(). | |
725 | */ | |
726 | read_random(&vm_kernel_addrperm, sizeof(vm_kernel_addrperm)); | |
727 | vm_kernel_addrperm |= 1; | |
728 | read_random(&buf_kernel_addrperm, sizeof(buf_kernel_addrperm)); | |
729 | buf_kernel_addrperm |= 1; | |
730 | read_random(&vm_kernel_addrperm_ext, sizeof(vm_kernel_addrperm_ext)); | |
731 | vm_kernel_addrperm_ext |= 1; | |
732 | read_random(&vm_kernel_addrhash_salt, sizeof(vm_kernel_addrhash_salt)); | |
733 | read_random(&vm_kernel_addrhash_salt_ext, sizeof(vm_kernel_addrhash_salt_ext)); | |
734 | ||
735 | /* No changes to kernel text and rodata beyond this point. */ | |
736 | kernel_bootstrap_log("machine_lockdown"); | |
737 | machine_lockdown(); | |
738 | ||
739 | #ifdef IOKIT | |
740 | kernel_bootstrap_log("PE_lockdown_iokit"); | |
741 | PE_lockdown_iokit(); | |
742 | #endif | |
743 | /* | |
744 | * max_cpus must be nailed down by the time PE_lockdown_iokit() finishes, | |
745 | * at the latest | |
746 | */ | |
747 | vm_set_restrictions(machine_info.max_cpus); | |
748 | ||
749 | #ifdef CONFIG_XNUPOST | |
750 | kern_return_t result = kernel_list_tests(); | |
751 | result = kernel_do_post(); | |
752 | if (result != KERN_SUCCESS) { | |
753 | panic("kernel_do_post: Tests failed with result = 0x%08x\n", result); | |
754 | } | |
755 | kernel_bootstrap_log("kernel_do_post - done"); | |
756 | #endif /* CONFIG_XNUPOST */ | |
757 | ||
758 | ||
759 | #if KPERF | |
760 | kperf_init_early(); | |
761 | #endif | |
762 | ||
763 | /* | |
764 | * Start the user bootstrap. | |
765 | */ | |
766 | #ifdef MACH_BSD | |
767 | bsd_init(); | |
768 | #endif | |
769 | ||
770 | ||
771 | /* | |
772 | * Get rid of pages used for early boot tracing. | |
773 | */ | |
774 | kdebug_free_early_buf(); | |
775 | ||
776 | serial_keyboard_init(); /* Start serial keyboard if wanted */ | |
777 | ||
778 | vm_page_init_local_q(machine_info.max_cpus); | |
779 | ||
780 | thread_bind(PROCESSOR_NULL); | |
781 | ||
782 | /* | |
783 | * Now that all CPUs are available to run threads, this is essentially | |
784 | * a background thread. Take this opportunity to initialize and free | |
785 | * any remaining vm_pages that were delayed earlier by pmap_startup(). | |
786 | */ | |
787 | vm_free_delayed_pages(); | |
788 | ||
789 | /* | |
790 | * Become the pageout daemon. | |
791 | */ | |
792 | vm_pageout(); | |
793 | /*NOTREACHED*/ | |
794 | } | |
795 | ||
796 | /* | |
797 | * slave_main: | |
798 | * | |
799 | * Load the first thread to start a processor. | |
800 | * This path will also be used by the master processor | |
801 | * after being offlined. | |
802 | */ | |
803 | void | |
804 | slave_main(void *machine_param) | |
805 | { | |
806 | processor_t processor = current_processor(); | |
807 | thread_t thread; | |
808 | ||
809 | /* | |
810 | * Use the idle processor thread if there | |
811 | * is no dedicated start up thread. | |
812 | */ | |
813 | if (processor->processor_offlined == true) { | |
814 | /* Return to the saved processor_offline context */ | |
815 | assert(processor->startup_thread == THREAD_NULL); | |
816 | ||
817 | thread = processor->idle_thread; | |
818 | thread->parameter = machine_param; | |
819 | } else if (processor->startup_thread) { | |
820 | thread = processor->startup_thread; | |
821 | processor->startup_thread = THREAD_NULL; | |
822 | } else { | |
823 | thread = processor->idle_thread; | |
824 | thread->continuation = processor_start_thread; | |
825 | thread->parameter = machine_param; | |
826 | } | |
827 | ||
828 | load_context(thread); | |
829 | /*NOTREACHED*/ | |
830 | } | |
831 | ||
832 | /* | |
833 | * processor_start_thread: | |
834 | * | |
835 | * First thread to execute on a started processor. | |
836 | * | |
837 | * Called at splsched. | |
838 | */ | |
839 | void | |
840 | processor_start_thread(void *machine_param, | |
841 | __unused wait_result_t result) | |
842 | { | |
843 | processor_t processor = current_processor(); | |
844 | thread_t self = current_thread(); | |
845 | ||
846 | slave_machine_init(machine_param); | |
847 | ||
848 | /* | |
849 | * If running the idle processor thread, | |
850 | * reenter the idle loop, else terminate. | |
851 | */ | |
852 | if (self == processor->idle_thread) { | |
853 | thread_block(idle_thread); | |
854 | } | |
855 | ||
856 | thread_terminate(self); | |
857 | /*NOTREACHED*/ | |
858 | } | |
859 | ||
860 | /* | |
861 | * load_context: | |
862 | * | |
863 | * Start the first thread on a processor. | |
864 | * This may be the first thread ever run on a processor, or | |
865 | * it could be a processor that was previously offlined. | |
866 | */ | |
867 | static void __attribute__((noreturn)) | |
868 | load_context( | |
869 | thread_t thread) | |
870 | { | |
871 | processor_t processor = current_processor(); | |
872 | ||
873 | ||
874 | #define load_context_kprintf(x...) /* kprintf("load_context: " x) */ | |
875 | ||
876 | load_context_kprintf("machine_set_current_thread\n"); | |
877 | machine_set_current_thread(thread); | |
878 | ||
879 | load_context_kprintf("processor_up\n"); | |
880 | ||
881 | PMAP_ACTIVATE_KERNEL(processor->cpu_id); | |
882 | ||
883 | /* | |
884 | * Acquire a stack if none attached. The panic | |
885 | * should never occur since the thread is expected | |
886 | * to have reserved stack. | |
887 | */ | |
888 | load_context_kprintf("thread %p, stack %lx, stackptr %lx\n", thread, | |
889 | thread->kernel_stack, thread->machine.kstackptr); | |
890 | if (!thread->kernel_stack) { | |
891 | load_context_kprintf("stack_alloc_try\n"); | |
892 | if (!stack_alloc_try(thread)) { | |
893 | panic("load_context"); | |
894 | } | |
895 | } | |
896 | ||
897 | /* | |
898 | * The idle processor threads are not counted as | |
899 | * running for load calculations. | |
900 | */ | |
901 | if (!(thread->state & TH_IDLE)) { | |
902 | SCHED(run_count_incr)(thread); | |
903 | } | |
904 | ||
905 | processor->active_thread = thread; | |
906 | processor_state_update_explicit(processor, thread->sched_pri, | |
907 | SFI_CLASS_KERNEL, PSET_SMP, thread_get_perfcontrol_class(thread), THREAD_URGENCY_NONE, | |
908 | ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket); | |
909 | processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL; | |
910 | processor->current_is_NO_SMT = false; | |
911 | processor->current_is_eagerpreempt = false; | |
912 | #if CONFIG_THREAD_GROUPS | |
913 | processor->current_thread_group = thread_group_get(thread); | |
914 | #endif | |
915 | processor->starting_pri = thread->sched_pri; | |
916 | processor->deadline = UINT64_MAX; | |
917 | thread->last_processor = processor; | |
918 | processor_up(processor); | |
919 | processor->last_dispatch = mach_absolute_time(); | |
920 | timer_start(&thread->system_timer, processor->last_dispatch); | |
921 | processor->thread_timer = processor->kernel_timer = &thread->system_timer; | |
922 | ||
923 | timer_start(&processor->system_state, processor->last_dispatch); | |
924 | processor->current_state = &processor->system_state; | |
925 | ||
926 | #if __AMP__ | |
927 | if (processor->processor_set->pset_cluster_type == PSET_AMP_P) { | |
928 | timer_start(&thread->ptime, processor->last_dispatch); | |
929 | } | |
930 | #endif | |
931 | ||
932 | cpu_quiescent_counter_join(processor->last_dispatch); | |
933 | ||
934 | PMAP_ACTIVATE_USER(thread, processor->cpu_id); | |
935 | ||
936 | load_context_kprintf("machine_load_context\n"); | |
937 | ||
938 | machine_load_context(thread); | |
939 | /*NOTREACHED*/ | |
940 | } | |
941 | ||
942 | void | |
943 | scale_setup(void) | |
944 | { | |
945 | int scale = 0; | |
946 | #if defined(__LP64__) | |
947 | typeof(task_max) task_max_base = task_max; | |
948 | ||
949 | /* Raise limits for servers with >= 16G */ | |
950 | if ((serverperfmode != 0) && ((uint64_t)max_mem_actual >= (uint64_t)(16 * 1024 * 1024 * 1024ULL))) { | |
951 | scale = (int)((uint64_t)sane_size / (uint64_t)(8 * 1024 * 1024 * 1024ULL)); | |
952 | /* limit to 128 G */ | |
953 | if (scale > 16) { | |
954 | scale = 16; | |
955 | } | |
956 | task_max_base = 2500; | |
957 | /* Raise limits for machines with >= 3GB */ | |
958 | } else if ((uint64_t)max_mem_actual >= (uint64_t)(3 * 1024 * 1024 * 1024ULL)) { | |
959 | if ((uint64_t)max_mem_actual < (uint64_t)(8 * 1024 * 1024 * 1024ULL)) { | |
960 | scale = 2; | |
961 | } else { | |
962 | /* limit to 64GB */ | |
963 | scale = MIN(16, (int)((uint64_t)max_mem_actual / (uint64_t)(4 * 1024 * 1024 * 1024ULL))); | |
964 | } | |
965 | } | |
966 | ||
967 | task_max = MAX(task_max, task_max_base * scale); | |
968 | ||
969 | if (scale != 0) { | |
970 | task_threadmax = task_max; | |
971 | thread_max = task_max * 5; | |
972 | } | |
973 | ||
974 | #endif | |
975 | ||
976 | bsd_scale_setup(scale); | |
977 | } |