X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/55e303ae13a4cf49d70f2294092726f2fffb9ef2..2dced7af2b695f87fe26496a3e73c219b7880cbc:/osfmk/kern/startup.c diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index f4256a9ad..53013fa79 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -1,16 +1,19 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -20,7 +23,7 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -50,6 +53,12 @@ * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ +/* + * NOTICE: This file was modified by McAfee Research in 2004 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ /* */ @@ -60,189 +69,374 @@ #include #include #include -#include -#include -#include -#include #include #include +#include #include #include #include #include +#include #include #include +#include #include -#include +#include #include #include #include -#include +#if CONFIG_SCHED_SFI +#include +#endif #include #include #include #include -#include +#if CONFIG_TELEMETRY +#include +#endif #include #include -#include +#include +#include +#include +#include +#include #include #include #include #include #include #include +#include #include #include -#include +#include +#include +#include +#include + +#include + + +#if CONFIG_ATM +#include +#endif + +#if CONFIG_CSR +#include +#endif + +#if CONFIG_BANK +#include +#endif + +#if ALTERNATE_DEBUGGER +#include +#endif -#ifdef __ppc__ -#include -#include +#if MACH_KDP +#include #endif -/* Externs XXX */ -extern void rtclock_reset(void); +#if CONFIG_MACF +#include +#endif -/* Forwards */ -void cpu_launch_first_thread( - thread_t thread); -void start_kernel_threads(void); +#if KPC +#include +#endif + +#if KPERF +#include +#endif + +#if HYPERVISOR +#include +#endif + + +#include +static void kernel_bootstrap_thread(void); + +static void load_context( + thread_t thread); +#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 +extern void cpu_userwindow_init(int); +extern void cpu_physwindow_init(int); +#endif + +#if CONFIG_ECC_LOGGING +#include +#endif + +#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX +#include +#endif + +// libkern/OSKextLib.cpp +extern void OSKextRemoveKextBootstrap(void); + +void scale_setup(void); +extern void bsd_scale_setup(int); +extern unsigned int semaphore_max; +extern void stackshot_lock_init(void); /* * Running in virtual memory, on the interrupt stack. - * Does not return. Dispatches initial thread. - * - * Assumes that master_cpu is set. */ + +extern int serverperfmode; + +/* size of kernel trace buffer, disabled by default */ +unsigned int new_nkdbufs = 0; +unsigned int wake_nkdbufs = 0; +unsigned int write_trace_on_panic = 0; +unsigned int trace_typefilter = 0; +boolean_t trace_serial = FALSE; + +/* mach leak logging */ +int log_leaks = 0; +int turn_on_log_leaks = 0; + +static inline void +kernel_bootstrap_log(const char *message) +{ +// kprintf("kernel_bootstrap: %s\n", message); + kernel_debug_string_simple(message); +} + +static inline void +kernel_bootstrap_thread_log(const char *message) +{ +// kprintf("kernel_bootstrap_thread: %s\n", message); + kernel_debug_string_simple(message); +} + void -setup_main(void) +kernel_early_bootstrap(void) { - thread_t startup_thread; + /* serverperfmode is needed by timer setup */ + if (PE_parse_boot_argn("serverperfmode", &serverperfmode, sizeof (serverperfmode))) { + serverperfmode = 1; + } - sched_init(); + lck_mod_init(); + + /* + * Initialize the timer callout world + */ + timer_call_init(); + +#if CONFIG_SCHED_SFI + /* + * Configure SFI classes + */ + sfi_early_init(); +#endif +} + +extern boolean_t IORamDiskBSDRoot(void); +extern kern_return_t cpm_preallocate_early(void); + +void +kernel_bootstrap(void) +{ + kern_return_t result; + thread_t thread; + char namep[16]; + + printf("%s\n", version); /* log kernel version */ + + if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */ + turn_on_log_leaks = 1; + + PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs)); + PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof (wake_nkdbufs)); + PE_parse_boot_argn("trace_panic", &write_trace_on_panic, sizeof(write_trace_on_panic)); + PE_parse_boot_argn("trace_typefilter", &trace_typefilter, sizeof(trace_typefilter)); + + scale_setup(); + + kernel_bootstrap_log("vm_mem_bootstrap"); vm_mem_bootstrap(); - ipc_bootstrap(); + + kernel_bootstrap_log("cs_init"); + cs_init(); + + kernel_bootstrap_log("vm_mem_init"); vm_mem_init(); + + machine_info.memory_size = (uint32_t)mem_size; + machine_info.max_mem = max_mem; + machine_info.major_version = version_major; + machine_info.minor_version = version_minor; + + +#if CONFIG_TELEMETRY + kernel_bootstrap_log("telemetry_init"); + telemetry_init(); +#endif + +#if CONFIG_CSR + kernel_bootstrap_log("csr_init"); + csr_init(); +#endif + + kernel_bootstrap_log("stackshot_lock_init"); + stackshot_lock_init(); + + kernel_bootstrap_log("sched_init"); + sched_init(); + + kernel_bootstrap_log("waitq_bootstrap"); + waitq_bootstrap(); + + kernel_bootstrap_log("ipc_bootstrap"); + ipc_bootstrap(); + +#if CONFIG_MACF + kernel_bootstrap_log("mac_policy_init"); + mac_policy_init(); +#endif + + kernel_bootstrap_log("ipc_init"); ipc_init(); /* * As soon as the virtual memory system is up, we record * that this CPU is using the kernel pmap. */ + kernel_bootstrap_log("PMAP_ACTIVATE_KERNEL"); PMAP_ACTIVATE_KERNEL(master_cpu); -#ifdef __ppc__ + kernel_bootstrap_log("mapping_free_prime"); mapping_free_prime(); /* Load up with temporary mapping blocks */ -#endif + kernel_bootstrap_log("machine_init"); machine_init(); - kmod_init(); - clock_init(); - init_timers(); - timer_call_initialize(); + kernel_bootstrap_log("clock_init"); + clock_init(); - machine_info.max_cpus = NCPUS; - machine_info.memory_size = mem_size; - machine_info.avail_cpus = 0; - machine_info.major_version = KERNEL_MAJOR_VERSION; - machine_info.minor_version = KERNEL_MINOR_VERSION; + ledger_init(); /* * Initialize the IPC, task, and thread subsystems. */ - ledger_init(); +#if CONFIG_COALITIONS + kernel_bootstrap_log("coalitions_init"); + coalitions_init(); +#endif + + kernel_bootstrap_log("task_init"); task_init(); + + kernel_bootstrap_log("thread_init"); thread_init(); - /* - * Initialize the Event Trace Analysis Package. - * Dynamic Phase: 2 of 2 - */ - etap_init_phase2(); +#if CONFIG_ATM + /* Initialize the Activity Trace Resource Manager. */ + kernel_bootstrap_log("atm_init"); + atm_init(); +#endif + +#if CONFIG_BANK + /* Initialize the BANK Manager. */ + kernel_bootstrap_log("bank_init"); + bank_init(); +#endif - /* - * Create a kernel thread to start the other kernel - * threads. - */ - startup_thread = kernel_thread_create(start_kernel_threads, MAXPRI_KERNEL); + /* initialize the corpse config based on boot-args */ + corpses_init(); /* - * Start the thread. + * Create a kernel thread to execute the kernel bootstrap. */ - startup_thread->state = TH_RUN; - pset_run_incr(startup_thread->processor_set); + kernel_bootstrap_log("kernel_thread_create"); + result = kernel_thread_create((thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, &thread); + + if (result != KERN_SUCCESS) panic("kernel_bootstrap: result = %08X\n", result); + + thread->state = TH_RUN; + thread->last_made_runnable_time = mach_absolute_time(); + thread_deallocate(thread); - cpu_launch_first_thread(startup_thread); + kernel_bootstrap_log("load_context - done"); + load_context(thread); /*NOTREACHED*/ - panic("cpu_launch_first_thread returns!"); } +int kth_started = 0; + +vm_offset_t vm_kernel_addrperm; +vm_offset_t buf_kernel_addrperm; +vm_offset_t vm_kernel_addrperm_ext; + /* - * Now running in a thread. Create the rest of the kernel threads - * and the bootstrap task. + * Now running in a thread. Kick off other services, + * invoke user bootstrap, enter pageout loop. */ -void -start_kernel_threads(void) +static void +kernel_bootstrap_thread(void) { - register int i; + processor_t processor = current_processor(); - thread_bind(current_thread(), cpu_to_processor(cpu_number())); +#define kernel_bootstrap_thread_kprintf(x...) /* kprintf("kernel_bootstrap_thread: " x) */ + kernel_bootstrap_thread_log("idle_thread_create"); + /* + * Create the idle processor thread. + */ + idle_thread_create(processor); /* - * Create the idle threads and the other - * service threads. + * N.B. Do not stick anything else + * before this point. + * + * Start up the scheduler services. */ - for (i = 0; i < NCPUS; i++) { - processor_t processor = cpu_to_processor(i); - thread_t thread; - spl_t s; - - thread = kernel_thread_create(idle_thread, MAXPRI_KERNEL); - - s = splsched(); - thread_lock(thread); - thread->bound_processor = processor; - processor->idle_thread = thread; - thread->ref_count++; - thread->sched_pri = thread->priority = IDLEPRI; - thread->state = (TH_RUN | TH_IDLE); - thread_unlock(thread); - splx(s); - } + kernel_bootstrap_thread_log("sched_startup"); + sched_startup(); /* - * Initialize the thread reaper mechanism. + * Thread lifecycle maintenance (teardown, stack allocation) */ - thread_reaper_init(); + kernel_bootstrap_thread_log("thread_daemon_init"); + thread_daemon_init(); + + /* Create kernel map entry reserve */ + vm_kernel_reserved_entry_init(); /* - * Initialize the stack swapin mechanism. + * Thread callout service. */ - swapin_init(); + kernel_bootstrap_thread_log("thread_call_initialize"); + thread_call_initialize(); /* - * Initialize the periodic scheduler mechanism. + * Remain on current processor as + * additional processors come online. */ - sched_tick_init(); + kernel_bootstrap_thread_log("thread_bind"); + thread_bind(processor); /* - * Initialize the thread callout mechanism. + * Initialize ipc thread call support. */ - thread_call_initialize(); + kernel_bootstrap_thread_log("ipc_thread_call_init"); + ipc_thread_call_init(); /* - * Invoke some black magic. + * Kick off memory mapping adjustments. */ -#if __ppc__ + kernel_bootstrap_thread_log("mapping_adjust"); mapping_adjust(); -#endif /* * Create the clock service. */ + kernel_bootstrap_thread_log("clock_service_create"); clock_service_create(); /* @@ -250,105 +444,315 @@ start_kernel_threads(void) */ device_service_create(); - shared_file_boot_time_init(ENV_DEFAULT_ROOT, machine_slot[cpu_number()].cpu_type); + kth_started = 1; + +#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 + /* + * Create and initialize the physical copy window for processor 0 + * This is required before starting kicking off IOKit. + */ + cpu_physwindow_init(0); +#endif -#ifdef IOKIT - { - PE_init_iokit(); + + +#if MACH_KDP + kernel_bootstrap_log("kdp_init"); + kdp_init(); +#endif + +#if ALTERNATE_DEBUGGER + alternate_debugger_init(); +#endif + +#if KPC + kpc_init(); +#endif + +#if CONFIG_ECC_LOGGING + ecc_log_init(); +#endif + +#if KPERF + kperf_bootstrap(); +#endif + +#if HYPERVISOR + hv_support_init(); +#endif + +#if CONFIG_TELEMETRY + kernel_bootstrap_log("bootprofile_init"); + bootprofile_init(); +#endif + +#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX + vmx_init(); +#endif + +#if (defined(__i386__) || defined(__x86_64__)) + if (kdebug_serial) { + new_nkdbufs = 1; + if (trace_typefilter == 0) + trace_typefilter = 1; } + if (turn_on_log_leaks && !new_nkdbufs) + new_nkdbufs = 200000; + if (trace_typefilter) + start_kern_tracing_with_typefilter(new_nkdbufs, + FALSE, + trace_typefilter); + else + start_kern_tracing(new_nkdbufs, FALSE); + if (turn_on_log_leaks) + log_leaks = 1; + #endif - + + kernel_bootstrap_log("prng_init"); + prng_cpu_init(master_cpu); + +#ifdef IOKIT + PE_init_iokit(); +#endif + + assert(ml_get_interrupts_enabled() == FALSE); (void) spllo(); /* Allow interruptions */ - /* - * Fill in the comm area (mapped into every task address space.) - */ - commpage_populate(); +#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 + /* + * Create and initialize the copy window for processor 0 + * This also allocates window space for all other processors. + * However, this is dependent on the number of processors - so this call + * must be after IOKit has been started because IOKit performs processor + * discovery. + */ + cpu_userwindow_init(0); +#endif + +#if (!defined(__i386__) && !defined(__x86_64__)) + if (turn_on_log_leaks && !new_nkdbufs) + new_nkdbufs = 200000; + if (trace_typefilter) + start_kern_tracing_with_typefilter(new_nkdbufs, FALSE, trace_typefilter); + else + start_kern_tracing(new_nkdbufs, FALSE); + if (turn_on_log_leaks) + log_leaks = 1; +#endif + + /* + * Initialize the shared region module. + */ + vm_shared_region_init(); + vm_commpage_init(); + vm_commpage_text_init(); + + +#if CONFIG_MACF + kernel_bootstrap_log("mac_policy_initmach"); + mac_policy_initmach(); +#endif + +#if CONFIG_SCHED_SFI + kernel_bootstrap_log("sfi_init"); + sfi_init(); +#endif + + /* + * Initialize the globals used for permuting kernel + * addresses that may be exported to userland as tokens + * using VM_KERNEL_ADDRPERM()/VM_KERNEL_ADDRPERM_EXTERNAL(). + * Force the random number to be odd to avoid mapping a non-zero + * word-aligned address to zero via addition. + * Note: at this stage we can use the cryptographically secure PRNG + * rather than early_random(). + */ + read_random(&vm_kernel_addrperm, sizeof(vm_kernel_addrperm)); + vm_kernel_addrperm |= 1; + read_random(&buf_kernel_addrperm, sizeof(buf_kernel_addrperm)); + buf_kernel_addrperm |= 1; + read_random(&vm_kernel_addrperm_ext, sizeof(vm_kernel_addrperm_ext)); + vm_kernel_addrperm_ext |= 1; + + vm_set_restrictions(); + + /* * Start the user bootstrap. */ - #ifdef MACH_BSD - { - extern void bsd_init(void); - bsd_init(); - } + bsd_init(); #endif -#if __ppc__ + /* + * Get rid of segments used to bootstrap kext loading. This removes + * the KLD, PRELINK symtab, LINKEDIT, and symtab segments/load commands. + */ + OSKextRemoveKextBootstrap(); + serial_keyboard_init(); /* Start serial keyboard if wanted */ -#endif - thread_bind(current_thread(), PROCESSOR_NULL); + vm_page_init_local_q(); + + thread_bind(PROCESSOR_NULL); /* * Become the pageout daemon. */ - vm_pageout(); /*NOTREACHED*/ } +/* + * slave_main: + * + * Load the first thread to start a processor. + */ void -slave_main(void) +slave_main(void *machine_param) { - processor_t myprocessor = current_processor(); + processor_t processor = current_processor(); thread_t thread; - thread = myprocessor->next_thread; - myprocessor->next_thread = THREAD_NULL; - if (thread == THREAD_NULL) { - thread = machine_wake_thread; - machine_wake_thread = THREAD_NULL; + /* + * Use the idle processor thread if there + * is no dedicated start up thread. + */ + if (processor->next_thread == THREAD_NULL) { + thread = processor->idle_thread; + thread->continuation = (thread_continue_t)processor_start_thread; + thread->parameter = machine_param; + } + else { + thread = processor->next_thread; + processor->next_thread = THREAD_NULL; } - cpu_launch_first_thread(thread); + load_context(thread); /*NOTREACHED*/ - panic("slave_main"); } /* - * Now running in a thread context + * processor_start_thread: + * + * First thread to execute on a started processor. + * + * Called at splsched. */ void -start_cpu_thread(void) +processor_start_thread(void *machine_param) { - slave_machine_init(); + processor_t processor = current_processor(); + thread_t self = current_thread(); - (void) thread_terminate(current_act()); + slave_machine_init(machine_param); + + /* + * If running the idle processor thread, + * reenter the idle loop, else terminate. + */ + if (self == processor->idle_thread) + thread_block((thread_continue_t)idle_thread); + + thread_terminate(self); + /*NOTREACHED*/ } /* - * Start up the first thread on a CPU. + * load_context: + * + * Start the first thread on a processor. */ -void -cpu_launch_first_thread( +static void +load_context( thread_t thread) { - register int mycpu = cpu_number(); - processor_t processor = cpu_to_processor(mycpu); + processor_t processor = current_processor(); - clock_get_uptime(&processor->last_dispatch); - start_timer(&kernel_timer[mycpu]); - machine_thread_set_current(thread); - cpu_up(mycpu); - rtclock_reset(); /* start realtime clock ticking */ - PMAP_ACTIVATE_KERNEL(mycpu); +#define load_context_kprintf(x...) /* kprintf("load_context: " x) */ + + load_context_kprintf("machine_set_current_thread\n"); + machine_set_current_thread(thread); + + load_context_kprintf("processor_up\n"); + processor_up(processor); + + PMAP_ACTIVATE_KERNEL(processor->cpu_id); + + /* + * Acquire a stack if none attached. The panic + * should never occur since the thread is expected + * to have reserved stack. + */ + load_context_kprintf("thread %p, stack %lx, stackptr %lx\n", thread, + thread->kernel_stack, thread->machine.kstackptr); + if (!thread->kernel_stack) { + load_context_kprintf("stack_alloc_try\n"); + if (!stack_alloc_try(thread)) + panic("load_context"); + } + + /* + * The idle processor threads are not counted as + * running for load calculations. + */ + if (!(thread->state & TH_IDLE)) + sched_run_incr(thread); - thread_lock(thread); - thread->state &= ~TH_UNINT; - thread->last_processor = processor; processor->active_thread = thread; processor->current_pri = thread->sched_pri; - _mk_sp_thread_begin(thread, processor); - thread_unlock(thread); - timer_switch(&thread->system_timer); + processor->current_thmode = thread->sched_mode; + processor->deadline = UINT64_MAX; + thread->last_processor = processor; - PMAP_ACTIVATE_USER(thread->top_act, mycpu); + processor->last_dispatch = mach_absolute_time(); + timer_start(&thread->system_timer, processor->last_dispatch); + PROCESSOR_DATA(processor, thread_timer) = PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; - /* preemption enabled by load_context */ + timer_start(&PROCESSOR_DATA(processor, system_state), processor->last_dispatch); + PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state); + + PMAP_ACTIVATE_USER(thread, processor->cpu_id); + + load_context_kprintf("machine_load_context\n"); machine_load_context(thread); /*NOTREACHED*/ } + +void +scale_setup() +{ + int scale = 0; +#if defined(__LP64__) + typeof(task_max) task_max_base = task_max; + + /* Raise limits for servers with >= 16G */ + if ((serverperfmode != 0) && ((uint64_t)sane_size >= (uint64_t)(16 * 1024 * 1024 *1024ULL))) { + scale = (int)((uint64_t)sane_size / (uint64_t)(8 * 1024 * 1024 *1024ULL)); + /* limit to 128 G */ + if (scale > 16) + scale = 16; + task_max_base = 2500; + } else if ((uint64_t)sane_size >= (uint64_t)(3 * 1024 * 1024 *1024ULL)) + scale = 2; + + task_max = MAX(task_max, task_max_base * scale); + + if (scale != 0) { + task_threadmax = task_max; + thread_max = task_max * 5; + } + +#endif + + bsd_scale_setup(scale); + + ipc_space_max = SPACE_MAX; + ipc_port_max = PORT_MAX; + ipc_pset_max = SET_MAX; + semaphore_max = SEMAPHORE_MAX; +} +