]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/arm/arm_init.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / arm / arm_init.c
index cf490691536474765aebdff9e8fdfc65e12b41e7..8fadbf4e74dede629a19723a4bdd0e78ab80ca80 100644 (file)
 #include <prng/random.h>
 #include <machine/machine_routines.h>
 #include <machine/commpage.h>
+#if HIBERNATION
+#include <machine/pal_hibernate.h>
+#endif /* HIBERNATION */
 /* ARM64_TODO unify boot.h */
 #if __arm64__
+#include <pexpert/arm64/apple_arm64_common.h>
 #include <pexpert/arm64/boot.h>
 #elif __arm__
 #include <pexpert/arm/boot.h>
@@ -57,6 +61,8 @@
 #include <arm/caches_internal.h>
 #include <arm/cpu_internal.h>
 #include <arm/cpu_data_internal.h>
+#include <arm/cpuid_internal.h>
+#include <arm/io_map_entries.h>
 #include <arm/misc_protos.h>
 #include <arm/machine_cpu.h>
 #include <arm/rtclock.h>
@@ -66,6 +72,7 @@
 #include <libkern/stack_protector.h>
 #include <libkern/section_keywords.h>
 #include <san/kasan.h>
+#include <sys/kdebug.h>
 
 #include <pexpert/pexpert.h>
 
 #include <kern/monotonic.h>
 #endif /* MONOTONIC */
 
+#if HIBERNATION
+#include <IOKit/IOPlatformExpert.h>
+#endif /* HIBERNATION */
+
 extern void     patch_low_glo(void);
 extern int      serial_init(void);
 extern void sleep_token_buffer_init(void);
@@ -97,19 +108,34 @@ int             pc_trace_buf[PC_TRACE_BUF_SIZE] = {0};
 int             pc_trace_cnt = PC_TRACE_BUF_SIZE;
 int             debug_task;
 
-boolean_t up_style_idle_exit = 0;
-
+bool need_wa_rdar_55577508 = false;
+SECURITY_READ_ONLY_LATE(bool) static_kernelcache = false;
 
+#if HAS_BP_RET
+/* Enable both branch target retention (0x2) and branch direction retention (0x1) across sleep */
+uint32_t bp_ret = 3;
+extern void set_bp_ret(void);
+#endif
 
 #if INTERRUPT_MASKED_DEBUG
 boolean_t interrupt_masked_debug = 1;
+/* the following are in mach timebase units */
 uint64_t interrupt_masked_timeout = 0xd0000;
+uint64_t stackshot_interrupt_masked_timeout = 0xf9999;
 #endif
 
+/*
+ * A 6-second timeout will give the watchdog code a chance to run
+ * before a panic is triggered by the xcall routine.
+ */
+#define XCALL_ACK_TIMEOUT_NS ((uint64_t) 6000000000)
+uint64_t xcall_ack_timeout_abstime;
+
+
 boot_args const_boot_args __attribute__((section("__DATA, __const")));
 boot_args      *BootArgs __attribute__((section("__DATA, __const")));
 
-unsigned int arm_diag;
+TUNABLE(uint32_t, arm_diag, "diag", 0);
 #ifdef  APPLETYPHOON
 static unsigned cpus_defeatures = 0x0;
 extern void cpu_defeatures_set(unsigned int);
@@ -121,6 +147,15 @@ extern volatile boolean_t arm64_stall_sleep;
 
 extern boolean_t force_immediate_debug_halt;
 
+#if HAS_APPLE_PAC
+SECURITY_READ_ONLY_LATE(boolean_t) diversify_user_jop = TRUE;
+#endif
+
+SECURITY_READ_ONLY_LATE(uint64_t) gDramBase;
+SECURITY_READ_ONLY_LATE(uint64_t) gDramSize;
+
+SECURITY_READ_ONLY_LATE(bool) serial_console_enabled = false;
+
 /*
  * Forward definition
  */
@@ -128,6 +163,9 @@ void arm_init(boot_args * args);
 
 #if __arm64__
 unsigned int page_shift_user32; /* for page_size as seen by a 32-bit task */
+
+extern void configure_misc_apple_boot_args(void);
+extern void configure_misc_apple_regs(void);
 #endif /* __arm64__ */
 
 
@@ -135,69 +173,133 @@ unsigned int page_shift_user32; /* for page_size as seen by a 32-bit task */
  * JOP rebasing
  */
 
+#define dyldLogFunc(msg, ...)
+#include <mach/dyld_kernel_fixups.h>
+
+extern uint32_t __thread_starts_sect_start[] __asm("section$start$__TEXT$__thread_starts");
+extern uint32_t __thread_starts_sect_end[]   __asm("section$end$__TEXT$__thread_starts");
+#if defined(HAS_APPLE_PAC)
+extern void OSRuntimeSignStructors(kernel_mach_header_t * header);
+extern void OSRuntimeSignStructorsInFileset(kernel_mach_header_t * header);
+#endif /* defined(HAS_APPLE_PAC) */
+
+extern vm_offset_t vm_kernel_slide;
+extern vm_offset_t segLOWESTKC, segHIGHESTKC, segLOWESTROKC, segHIGHESTROKC;
+extern vm_offset_t segLOWESTAuxKC, segHIGHESTAuxKC, segLOWESTROAuxKC, segHIGHESTROAuxKC;
+extern vm_offset_t segLOWESTRXAuxKC, segHIGHESTRXAuxKC, segHIGHESTNLEAuxKC;
 
-// Note, the following should come from a header from dyld
 static void
-rebase_chain(uintptr_t chainStartAddress, uint64_t stepMultiplier, uintptr_t baseAddress __unused, uint64_t slide)
+arm_slide_rebase_and_sign_image(void)
 {
-       uint64_t delta = 0;
-       uintptr_t address = chainStartAddress;
-       do {
-               uint64_t value = *(uint64_t*)address;
-
-               bool isAuthenticated = (value & (1ULL << 63)) != 0;
-               bool isRebase = (value & (1ULL << 62)) == 0;
-               if (isRebase) {
-                       if (isAuthenticated) {
-                               // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
-                               uint64_t newValue = (value & 0xFFFFFFFF) + slide;
-                               // Add in the offset from the mach_header
-                               newValue += baseAddress;
-                               *(uint64_t*)address = newValue;
-                       } else {
-                               // Regular pointer which needs to fit in 51-bits of value.
-                               // C++ RTTI uses the top bit, so we'll allow the whole top-byte
-                               // and the bottom 43-bits to be fit in to 51-bits.
-                               uint64_t top8Bits = value & 0x0007F80000000000ULL;
-                               uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
-                               uint64_t targetValue = (top8Bits << 13) | (((intptr_t)(bottom43Bits << 21) >> 21) & 0x00FFFFFFFFFFFFFF);
-                               targetValue = targetValue + slide;
-                               *(uint64_t*)address = targetValue;
-                       }
+       kernel_mach_header_t *k_mh, *kc_mh = NULL;
+       kernel_segment_command_t *seg;
+       uintptr_t slide;
+
+       k_mh = &_mh_execute_header;
+       if (kernel_mach_header_is_in_fileset(k_mh)) {
+               /*
+                * The kernel is part of a MH_FILESET kernel collection, determine slide
+                * based on first segment's mach-o vmaddr (requires first kernel load
+                * command to be LC_SEGMENT_64 of the __TEXT segment)
+                */
+               seg = (kernel_segment_command_t *)((uintptr_t)k_mh + sizeof(*k_mh));
+               assert(seg->cmd == LC_SEGMENT_KERNEL);
+               slide = (uintptr_t)k_mh - seg->vmaddr;
+
+               /*
+                * The kernel collection linker guarantees that the boot collection mach
+                * header vmaddr is the hardcoded kernel link address (as specified to
+                * ld64 when linking the kernel).
+                */
+               kc_mh = (kernel_mach_header_t*)(VM_KERNEL_LINK_ADDRESS + slide);
+               assert(kc_mh->filetype == MH_FILESET);
+
+               /*
+                * rebase and sign jops
+                * Note that we can't call any functions before this point, so
+                * we have to hard-code the knowledge that the base of the KC
+                * is the KC's mach-o header. This would change if any
+                * segment's VA started *before* the text segment
+                * (as the HIB segment does on x86).
+                */
+               const void *collection_base_pointers[KCNumKinds] = {[0] = kc_mh, };
+               kernel_collection_slide((struct mach_header_64 *)kc_mh, collection_base_pointers);
+
+               PE_set_kc_header(KCKindPrimary, kc_mh, slide);
+
+               /*
+                * iBoot doesn't slide load command vmaddrs in an MH_FILESET kernel
+                * collection, so adjust them now, and determine the vmaddr range
+                * covered by read-only segments for the CTRR rorgn.
+                */
+               kernel_collection_adjust_mh_addrs((struct mach_header_64 *)kc_mh, slide, false,
+                   (uintptr_t *)&segLOWESTKC, (uintptr_t *)&segHIGHESTKC,
+                   (uintptr_t *)&segLOWESTROKC, (uintptr_t *)&segHIGHESTROKC,
+                   NULL, NULL, NULL);
+#if defined(HAS_APPLE_PAC)
+               OSRuntimeSignStructorsInFileset(kc_mh);
+#endif /* defined(HAS_APPLE_PAC) */
+       } else {
+               /*
+                * Static kernelcache: iBoot slid kernel MachO vmaddrs, determine slide
+                * using hardcoded kernel link address
+                */
+               slide = (uintptr_t)k_mh - VM_KERNEL_LINK_ADDRESS;
+
+               /* rebase and sign jops */
+               static_kernelcache = &__thread_starts_sect_end[0] != &__thread_starts_sect_start[0];
+               if (static_kernelcache) {
+                       rebase_threaded_starts( &__thread_starts_sect_start[0],
+                           &__thread_starts_sect_end[0],
+                           (uintptr_t)k_mh, (uintptr_t)k_mh - slide, slide);
                }
+#if defined(HAS_APPLE_PAC)
+               OSRuntimeSignStructors(&_mh_execute_header);
+#endif /* defined(HAS_APPLE_PAC) */
+       }
+
 
-               // The delta is bits [51..61]
-               // And bit 62 is to tell us if we are a rebase (0) or bind (1)
-               value &= ~(1ULL << 62);
-               delta = (value & 0x3FF8000000000000) >> 51;
-               address += delta * stepMultiplier;
-       } while (delta != 0);
+       /*
+        * Initialize slide global here to avoid duplicating this logic in
+        * arm_vm_init()
+        */
+       vm_kernel_slide = slide;
 }
 
-// Note, the following method should come from a header from dyld
-static bool
-rebase_threaded_starts(uint32_t *threadArrayStart, uint32_t *threadArrayEnd,
-    uintptr_t macho_header_addr, uintptr_t macho_header_vmaddr, size_t slide)
+void
+arm_auxkc_init(void *mh, void *base)
 {
-       uint32_t threadStartsHeader = *threadArrayStart;
-       uint64_t stepMultiplier = (threadStartsHeader & 1) == 1 ? 8 : 4;
-       for (uint32_t* threadOffset = threadArrayStart + 1; threadOffset != threadArrayEnd; ++threadOffset) {
-               if (*threadOffset == 0xFFFFFFFF) {
-                       break;
-               }
-               rebase_chain(macho_header_addr + *threadOffset, stepMultiplier, macho_header_vmaddr, slide);
-       }
-       return true;
+       /*
+        * The kernel collection linker guarantees that the lowest vmaddr in an
+        * AuxKC collection is 0 (but note that the mach header is higher up since
+        * RW segments precede RO segments in the AuxKC).
+        */
+       uintptr_t slide = (uintptr_t)base;
+       kernel_mach_header_t *akc_mh = (kernel_mach_header_t*)mh;
+
+       assert(akc_mh->filetype == MH_FILESET);
+       PE_set_kc_header_and_base(KCKindAuxiliary, akc_mh, base, slide);
+
+       /* rebase and sign jops */
+       const void *collection_base_pointers[KCNumKinds];
+       memcpy(collection_base_pointers, PE_get_kc_base_pointers(), sizeof(collection_base_pointers));
+       kernel_collection_slide((struct mach_header_64 *)akc_mh, collection_base_pointers);
+
+       kernel_collection_adjust_mh_addrs((struct mach_header_64 *)akc_mh, slide, false,
+           (uintptr_t *)&segLOWESTAuxKC, (uintptr_t *)&segHIGHESTAuxKC, (uintptr_t *)&segLOWESTROAuxKC,
+           (uintptr_t *)&segHIGHESTROAuxKC, (uintptr_t *)&segLOWESTRXAuxKC, (uintptr_t *)&segHIGHESTRXAuxKC,
+           (uintptr_t *)&segHIGHESTNLEAuxKC);
+#if defined(HAS_APPLE_PAC)
+       OSRuntimeSignStructorsInFileset(akc_mh);
+#endif /* defined(HAS_APPLE_PAC) */
 }
 
 /*
  *             Routine:                arm_init
- *             Function:
+ *             Function:               Runs on the boot CPU, once, on entry from iBoot.
  */
 
-extern uint32_t __thread_starts_sect_start[] __asm("section$start$__TEXT$__thread_starts");
-extern uint32_t __thread_starts_sect_end[]   __asm("section$end$__TEXT$__thread_starts");
-
+__startup_func
 void
 arm_init(
        boot_args       *args)
@@ -206,76 +308,50 @@ arm_init(
        uint32_t        memsize;
        uint64_t        xmaxmem;
        thread_t        thread;
-       processor_t     my_master_proc;
-
-       // rebase and sign jops
-       if (&__thread_starts_sect_end[0] != &__thread_starts_sect_start[0]) {
-               uintptr_t mh    = (uintptr_t) &_mh_execute_header;
-               uintptr_t slide = mh - VM_KERNEL_LINK_ADDRESS;
-               rebase_threaded_starts( &__thread_starts_sect_start[0],
-                   &__thread_starts_sect_end[0],
-                   mh, mh - slide, slide);
-       }
+
+       arm_slide_rebase_and_sign_image();
 
        /* If kernel integrity is supported, use a constant copy of the boot args. */
        const_boot_args = *args;
        BootArgs = args = &const_boot_args;
 
        cpu_data_init(&BootCpuData);
+#if defined(HAS_APPLE_PAC)
+       /* bootstrap cpu process dependent key for kernel has been loaded by start.s */
+       BootCpuData.rop_key = ml_default_rop_pid();
+       BootCpuData.jop_key = ml_default_jop_pid();
+#endif /* defined(HAS_APPLE_PAC) */
 
        PE_init_platform(FALSE, args); /* Get platform expert set up */
 
 #if __arm64__
+       wfe_timeout_configure();
 
+       configure_misc_apple_boot_args();
+       configure_misc_apple_regs();
 
-       {
-               unsigned int    tmp_16k = 0;
 
-#ifdef  XXXX
+       {
                /*
-                * Select the advertised kernel page size; without the boot-arg
-                * we default to the hardware page size for the current platform.
+                * Select the advertised kernel page size.
                 */
-               if (PE_parse_boot_argn("-vm16k", &tmp_16k, sizeof(tmp_16k))) {
+               if (args->memSize > 1ULL * 1024 * 1024 * 1024) {
+                       /*
+                        * arm64 device with > 1GB of RAM:
+                        * kernel uses 16KB pages.
+                        */
                        PAGE_SHIFT_CONST = PAGE_MAX_SHIFT;
                } else {
+                       /*
+                        * arm64 device with <= 1GB of RAM:
+                        * kernel uses hardware page size
+                        * (4KB for H6/H7, 16KB for H8+).
+                        */
                        PAGE_SHIFT_CONST = ARM_PGSHIFT;
                }
-#else
-               /*
-                * Select the advertised kernel page size; with the boot-arg
-                * use to the hardware page size for the current platform.
-                */
-               int radar_20804515 = 1; /* default: new mode */
-               PE_parse_boot_argn("radar_20804515", &radar_20804515, sizeof(radar_20804515));
-               if (radar_20804515) {
-                       if (args->memSize > 1ULL * 1024 * 1024 * 1024) {
-                               /*
-                                * arm64 device with > 1GB of RAM:
-                                * kernel uses 16KB pages.
-                                */
-                               PAGE_SHIFT_CONST = PAGE_MAX_SHIFT;
-                       } else {
-                               /*
-                                * arm64 device with <= 1GB of RAM:
-                                * kernel uses hardware page size
-                                * (4KB for H6/H7, 16KB for H8+).
-                                */
-                               PAGE_SHIFT_CONST = ARM_PGSHIFT;
-                       }
-                       /* 32-bit apps always see 16KB page size */
-                       page_shift_user32 = PAGE_MAX_SHIFT;
-               } else {
-                       /* kernel page size: */
-                       if (PE_parse_boot_argn("-use_hwpagesize", &tmp_16k, sizeof(tmp_16k))) {
-                               PAGE_SHIFT_CONST = ARM_PGSHIFT;
-                       } else {
-                               PAGE_SHIFT_CONST = PAGE_MAX_SHIFT;
-                       }
-                       /* old mode: 32-bit apps see same page size as kernel */
-                       page_shift_user32 = PAGE_SHIFT_CONST;
-               }
-#endif
+
+               /* 32-bit apps always see 16KB page size */
+               page_shift_user32 = PAGE_MAX_SHIFT;
 #ifdef  APPLETYPHOON
                if (PE_parse_boot_argn("cpus_defeatures", &cpus_defeatures, sizeof(cpus_defeatures))) {
                        if ((cpus_defeatures & 0xF) != 0) {
@@ -304,15 +380,17 @@ arm_init(
        BootCpuData.fiqstack_top = (vm_offset_t) &fiqstack_top;
        BootCpuData.fiqstackptr = BootCpuData.fiqstack_top;
 #endif
-       BootCpuData.cpu_processor = cpu_processor_alloc(TRUE);
        BootCpuData.cpu_console_buf = (void *)NULL;
        CpuDataEntries[master_cpu].cpu_data_vaddr = &BootCpuData;
        CpuDataEntries[master_cpu].cpu_data_paddr = (void *)((uintptr_t)(args->physBase)
            + ((uintptr_t)&BootCpuData
            - (uintptr_t)(args->virtBase)));
 
-       thread_bootstrap();
-       thread = current_thread();
+       thread = thread_bootstrap();
+       thread->machine.CpuDatap = &BootCpuData;
+       thread->machine.pcpu_data_base = (vm_offset_t)0;
+       machine_set_current_thread(thread);
+
        /*
         * Preemption is enabled for this thread so that it can lock mutexes without
         * tripping the preemption check. In reality scheduling is not enabled until
@@ -320,35 +398,34 @@ arm_init(
         * preemption level is not really meaningful for the bootstrap thread.
         */
        thread->machine.preemption_count = 0;
-       thread->machine.CpuDatap = &BootCpuData;
 #if     __arm__ && __ARM_USER_PROTECT__
        {
-               unsigned int ttbr0_val, ttbr1_val, ttbcr_val;
+               unsigned int ttbr0_val, ttbr1_val;
                __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
                __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
-               __asm__ volatile ("mrc p15,0,%0,c2,c0,2\n" : "=r"(ttbcr_val));
                thread->machine.uptw_ttb = ttbr0_val;
                thread->machine.kptw_ttb = ttbr1_val;
-               thread->machine.uptw_ttc = ttbcr_val;
        }
 #endif
-       BootCpuData.cpu_processor->processor_data.kernel_timer = &thread->system_timer;
-       BootCpuData.cpu_processor->processor_data.thread_timer = &thread->system_timer;
+       processor_t boot_processor = PERCPU_GET_MASTER(processor);
+       boot_processor->kernel_timer = &thread->system_timer;
+       boot_processor->thread_timer = &thread->system_timer;
 
        cpu_bootstrap();
 
        rtclock_early_init();
 
-       kernel_early_bootstrap();
+       kernel_debug_string_early("kernel_startup_bootstrap");
+       kernel_startup_bootstrap();
 
-       cpu_init();
+       /*
+        * Initialize the timer callout world
+        */
+       timer_call_init();
 
-       EntropyData.index_ptr = EntropyData.buffer;
+       cpu_init();
 
        processor_bootstrap();
-       my_master_proc = master_processor;
-
-       (void)PE_parse_boot_argn("diag", &arm_diag, sizeof(arm_diag));
 
        if (PE_parse_boot_argn("maxmem", &maxmem, sizeof(maxmem))) {
                xmaxmem = (uint64_t) maxmem * (1024 * 1024);
@@ -358,23 +435,27 @@ arm_init(
                xmaxmem = 0;
        }
 
-       if (PE_parse_boot_argn("up_style_idle_exit", &up_style_idle_exit, sizeof(up_style_idle_exit))) {
-               up_style_idle_exit = 1;
-       }
 #if INTERRUPT_MASKED_DEBUG
        int wdt_boot_arg = 0;
        /* Disable if WDT is disabled or no_interrupt_mask_debug in boot-args */
        if (PE_parse_boot_argn("no_interrupt_masked_debug", &interrupt_masked_debug,
            sizeof(interrupt_masked_debug)) || (PE_parse_boot_argn("wdt", &wdt_boot_arg,
-           sizeof(wdt_boot_arg)) && (wdt_boot_arg == -1))) {
+           sizeof(wdt_boot_arg)) && (wdt_boot_arg == -1)) || kern_feature_override(KF_INTERRUPT_MASKED_DEBUG_OVRD)) {
                interrupt_masked_debug = 0;
        }
 
        PE_parse_boot_argn("interrupt_masked_debug_timeout", &interrupt_masked_timeout, sizeof(interrupt_masked_timeout));
-#endif
 
+#endif /* INTERRUPT_MASKED_DEBUG */
+
+       nanoseconds_to_absolutetime(XCALL_ACK_TIMEOUT_NS, &xcall_ack_timeout_abstime);
 
 
+#if HAS_BP_RET
+       PE_parse_boot_argn("bpret", &bp_ret, sizeof(bp_ret));
+       set_bp_ret(); // Apply branch predictor retention settings to boot CPU
+#endif
+
        PE_parse_boot_argn("immediate_NMI", &force_immediate_debug_halt, sizeof(force_immediate_debug_halt));
 
 #if __ARM_PAN_AVAILABLE__
@@ -383,33 +464,35 @@ arm_init(
 
        arm_vm_init(xmaxmem, args);
 
-       uint32_t debugmode;
-       if (PE_parse_boot_argn("debug", &debugmode, sizeof(debugmode)) &&
-           debugmode) {
+       if (debug_boot_arg) {
                patch_low_glo();
        }
 
-       printf_init();
-       panic_init();
-#if __arm64__
-       /* Enable asynchronous exceptions */
-       __builtin_arm_wsr("DAIFClr", DAIFSC_ASYNCF);
-#endif
 #if __arm64__ && WITH_CLASSIC_S2R
        sleep_token_buffer_init();
 #endif
 
        PE_consistent_debug_inherit();
 
-       /* setup debugging output if one has been chosen */
-       PE_init_kprintf(FALSE);
+       /*
+        * rdar://54622819 Insufficient HSP purge window can cause incorrect translation when ASID and TTBR base address is changed at same time)
+        * (original info on HSP purge window issues can be found in rdar://55577508)
+        * We need a flag to check for this, so calculate and set it here. We'll use it in machine_switch_amx_context().
+        */
+#if __arm64__
+       need_wa_rdar_55577508 = cpuid_get_cpufamily() == CPUFAMILY_ARM_LIGHTNING_THUNDER;
+#ifndef RC_HIDE_XNU_FIRESTORM
+       need_wa_rdar_55577508 |= (cpuid_get_cpufamily() == CPUFAMILY_ARM_FIRESTORM_ICESTORM && get_arm_cpu_version() == CPU_VERSION_A0);
+#endif
+#endif
 
+       /* setup debugging output if one has been chosen */
+       kernel_startup_initialize_upto(STARTUP_SUB_KPRINTF);
        kprintf("kprintf initialized\n");
 
-       serialmode = 0;                                                      /* Assume normal keyboard and console */
-       if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) { /* Do we want a serial
-                                                                             * keyboard and/or
-                                                                             * console? */
+       serialmode = 0;
+       if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) {
+               /* Do we want a serial keyboard and/or console? */
                kprintf("Serial mode specified: %08X\n", serialmode);
                int force_sync = serialmode & SERIALMODE_SYNCDRAIN;
                if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) {
@@ -427,6 +510,7 @@ arm_init(
        }
 
        if (serialmode & SERIALMODE_OUTPUT) {                 /* Start serial if requested */
+               serial_console_enabled = true;
                (void)switch_to_serial_console(); /* Switch into serial mode */
                disableConsoleOutput = FALSE;     /* Allow printfs to happen */
        }
@@ -450,9 +534,43 @@ arm_init(
 #endif
 
        PE_init_platform(TRUE, &BootCpuData);
+
+#if __arm64__
+       ml_map_cpu_pio();
+#endif
+
        cpu_timebase_init(TRUE);
-       fiq_context_bootstrap(TRUE);
+       PE_init_cpu();
+       fiq_context_init(TRUE);
+
+
+#if HIBERNATION
+       pal_hib_init();
+#endif /* HIBERNATION */
+
+       /*
+        * gPhysBase/Size only represent kernel-managed memory. These globals represent
+        * the actual DRAM base address and size as reported by iBoot through the
+        * device tree.
+        */
+       DTEntry chosen;
+       unsigned int dt_entry_size;
+       unsigned long const *dram_base;
+       unsigned long const *dram_size;
+       if (SecureDTLookupEntry(NULL, "/chosen", &chosen) != kSuccess) {
+               panic("%s: Unable to find 'chosen' DT node", __FUNCTION__);
+       }
+
+       if (SecureDTGetProperty(chosen, "dram-base", (void const **)&dram_base, &dt_entry_size) != kSuccess) {
+               panic("%s: Unable to find 'dram-base' entry in the 'chosen' DT node", __FUNCTION__);
+       }
+
+       if (SecureDTGetProperty(chosen, "dram-size", (void const **)&dram_size, &dt_entry_size) != kSuccess) {
+               panic("%s: Unable to find 'dram-size' entry in the 'chosen' DT node", __FUNCTION__);
+       }
 
+       gDramBase = *dram_base;
+       gDramSize = *dram_size;
 
        /*
         * Initialize the stack protector for all future calls
@@ -472,7 +590,7 @@ arm_init(
 /*
  * Routine:        arm_init_cpu
  * Function:
- *    Re-initialize CPU when coming out of reset
+ *    Runs on S2R resume (all CPUs) and SMP boot (non-boot CPUs only).
  */
 
 void
@@ -483,18 +601,44 @@ arm_init_cpu(
        __builtin_arm_wsr("pan", 1);
 #endif
 
+#ifdef __arm64__
+       configure_misc_apple_regs();
+#endif
+
        cpu_data_ptr->cpu_flags &= ~SleepState;
-#if     __ARM_SMP__ && defined(ARMA7)
+#if     defined(ARMA7)
        cpu_data_ptr->cpu_CLW_active = 1;
 #endif
 
        machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
 
+#if HIBERNATION
+       if ((cpu_data_ptr == &BootCpuData) && (gIOHibernateState == kIOHibernateStateWakingFromHibernate)) {
+               // the "normal" S2R code captures wake_abstime too early, so on a hibernation resume we fix it up here
+               extern uint64_t wake_abstime;
+               wake_abstime = gIOHibernateCurrentHeader->lastHibAbsTime;
+
+               // since the hw clock stops ticking across hibernation, we need to apply an offset;
+               // iBoot computes this offset for us and passes it via the hibernation header
+               extern uint64_t hwclock_conttime_offset;
+               hwclock_conttime_offset = gIOHibernateCurrentHeader->hwClockOffset;
+
+               // during hibernation, we captured the idle thread's state from inside the PPL context, so we have to
+               // fix up its preemption count
+               unsigned int expected_preemption_count = (gEnforceQuiesceSafety ? 2 : 1);
+               if (cpu_data_ptr->cpu_active_thread->machine.preemption_count != expected_preemption_count) {
+                       panic("unexpected preemption count %u on boot cpu thread (should be %u)\n",
+                           cpu_data_ptr->cpu_active_thread->machine.preemption_count,
+                           expected_preemption_count);
+               }
+               cpu_data_ptr->cpu_active_thread->machine.preemption_count--;
+       }
+#endif /* HIBERNATION */
+
 #if __arm64__
+       wfe_timeout_init();
        pmap_clear_user_ttb();
        flush_mmu_tlb();
-       /* Enable asynchronous exceptions */
-       __builtin_arm_wsr("DAIFClr", DAIFSC_ASYNCF);
 #endif
 
        cpu_machine_idle_init(FALSE);
@@ -528,6 +672,7 @@ arm_init_cpu(
                PE_init_platform(TRUE, NULL);
                commpage_update_timebase();
        }
+       PE_init_cpu();
 
        fiq_context_init(TRUE);
        cpu_data_ptr->rtcPop = EndOfAllTime;
@@ -537,9 +682,15 @@ arm_init_cpu(
        PE_arm_debug_enable_trace();
 #endif
 
-       kprintf("arm_cpu_init(): cpu %d online\n", cpu_data_ptr->cpu_processor->cpu_id);
+
+       kprintf("arm_cpu_init(): cpu %d online\n", cpu_data_ptr->cpu_number);
 
        if (cpu_data_ptr == &BootCpuData) {
+               if (kdebug_enable == 0) {
+                       __kdebug_only uint64_t elapsed = kdebug_wake();
+                       KDBG(IOKDBG_CODE(DBG_HIBERNATE, 15), mach_absolute_time() - elapsed);
+               }
+
 #if CONFIG_TELEMETRY
                bootprofile_wake_from_sleep();
 #endif /* CONFIG_TELEMETRY */
@@ -548,13 +699,21 @@ arm_init_cpu(
        mt_wake_per_core();
 #endif /* MONOTONIC && defined(__arm64__) */
 
+#if defined(KERNEL_INTEGRITY_CTRR)
+       if (ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] != CTRR_LOCKED) {
+               lck_spin_lock(&ctrr_cpu_start_lck);
+               ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] = CTRR_LOCKED;
+               thread_wakeup(&ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id]);
+               lck_spin_unlock(&ctrr_cpu_start_lck);
+       }
+#endif
 
        slave_main(NULL);
 }
 
 /*
- * Routine:        arm_init_idle_cpu
- * Function:
+ * Routine:            arm_init_idle_cpu
+ * Function:   Resume from non-retention WFI.  Called from the reset vector.
  */
 void __attribute__((noreturn))
 arm_init_idle_cpu(
@@ -563,13 +722,14 @@ arm_init_idle_cpu(
 #if __ARM_PAN_AVAILABLE__
        __builtin_arm_wsr("pan", 1);
 #endif
-#if     __ARM_SMP__ && defined(ARMA7)
+#if     defined(ARMA7)
        cpu_data_ptr->cpu_CLW_active = 1;
 #endif
 
        machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
 
 #if __arm64__
+       wfe_timeout_init();
        pmap_clear_user_ttb();
        flush_mmu_tlb();
        /* Enable asynchronous exceptions */