X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..HEAD:/osfmk/i386/machine_routines.c

diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c
index 4ffb4ddc3..7d0e21dac 100644
--- a/osfmk/i386/machine_routines.c
+++ b/osfmk/i386/machine_routines.c
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
@@ -33,130 +33,244 @@
 #include <mach/processor.h>
 #include <kern/processor.h>
 #include <kern/machine.h>
-#include <kern/cpu_data.h>
+
 #include <kern/cpu_number.h>
 #include <kern/thread.h>
-#include <i386/cpu_data.h>
+#include <kern/thread_call.h>
+#include <kern/policy_internal.h>
+
+#include <prng/random.h>
+#include <prng/entropy.h>
 #include <i386/machine_cpu.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
+#include <i386/bit_routines.h>
 #include <i386/mp_events.h>
-#include <i386/pmap.h>
-#include <i386/misc_protos.h>
 #include <i386/pmCPU.h>
-#include <i386/proc_reg.h>
+#include <i386/trap.h>
 #include <i386/tsc.h>
 #include <i386/cpu_threads.h>
+#include <i386/proc_reg.h>
 #include <mach/vm_param.h>
-#if MACH_KDB
-#include <i386/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
+#include <i386/pmap.h>
+#include <i386/pmap_internal.h>
+#include <i386/misc_protos.h>
+#include <kern/timer_queue.h>
+#include <vm/vm_map.h>
+#if KPC
+#include <kern/kpc.h>
 #endif
-
+#include <architecture/i386/pio.h>
+#include <i386/cpu_data.h>
 #if DEBUG
-#define DBG(x...)	kprintf("DBG: " x)
+#define DBG(x...)       kprintf("DBG: " x)
 #else
 #define DBG(x...)
 #endif
 
-extern thread_t	Shutdown_context(thread_t thread, void (*doshutdown)(processor_t),processor_t  processor);
-extern void 	wakeup(void *);
-extern unsigned KernelRelocOffset;
+#if MONOTONIC
+#include <kern/monotonic.h>
+#endif /* MONOTONIC */
 
-static int max_cpus_initialized = 0;
+extern void     wakeup(void *);
+
+uint64_t        LockTimeOut;
+uint64_t        TLBTimeOut;
+uint64_t        LockTimeOutTSC;
+uint32_t        LockTimeOutUsec;
+uint64_t        MutexSpin;
+uint64_t        low_MutexSpin;
+int64_t         high_MutexSpin;
+uint64_t        LastDebuggerEntryAllowance;
+uint64_t        delay_spin_threshold;
+
+extern uint64_t panic_restart_timeout;
+
+boolean_t virtualized = FALSE;
 
-unsigned int	LockTimeOut;
-unsigned int	LockTimeOutTSC;
-unsigned int	MutexSpin;
+decl_simple_lock_data(static, ml_timer_evaluation_slock);
+uint32_t ml_timer_eager_evaluations;
+uint64_t ml_timer_eager_evaluation_max;
+static boolean_t ml_timer_evaluation_in_progress = FALSE;
 
+LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
+LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
+static int max_cpus_initialized = 0;
 #define MAX_CPUS_SET    0x1
 #define MAX_CPUS_WAIT   0x2
 
 /* IO memory map services */
 
 /* Map memory map IO space */
-vm_offset_t ml_io_map(
-	vm_offset_t phys_addr, 
+vm_offset_t
+ml_io_map(
+	vm_offset_t phys_addr,
 	vm_size_t size)
 {
-	return(io_map(phys_addr,size,VM_WIMG_IO));
+	return io_map(phys_addr, size, VM_WIMG_IO);
 }
 
 /* boot memory allocation */
-vm_offset_t ml_static_malloc(
-			     __unused vm_size_t size)
+vm_offset_t
+ml_static_malloc(
+	__unused vm_size_t size)
 {
-	return((vm_offset_t)NULL);
+	return (vm_offset_t)NULL;
 }
 
 
-void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
+void
+ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
 {
-        *phys_addr = bounce_pool_base;
-	*size      = bounce_pool_size;
+	*phys_addr = 0;
+	*size      = 0;
 }
 
 
 vm_offset_t
-ml_boot_ptovirt(
+ml_static_ptovirt(
 	vm_offset_t paddr)
 {
-	return (vm_offset_t)((paddr-KernelRelocOffset) | LINEAR_KERNEL_ADDRESS);
-} 
+#if defined(__x86_64__)
+	return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
+#else
+	return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
+#endif
+}
 
 vm_offset_t
-ml_static_ptovirt(
-	vm_offset_t paddr)
+ml_static_slide(
+	vm_offset_t vaddr)
 {
-    return (vm_offset_t)((unsigned) paddr | LINEAR_KERNEL_ADDRESS);
-} 
+	return VM_KERNEL_SLIDE(vaddr);
+}
+
+/*
+ * base must be page-aligned, and size must be a multiple of PAGE_SIZE
+ */
+kern_return_t
+ml_static_verify_page_protections(
+	uint64_t base, uint64_t size, vm_prot_t prot)
+{
+	vm_prot_t pageprot;
+	uint64_t offset;
+
+	DBG("ml_static_verify_page_protections: vaddr 0x%llx sz 0x%llx prot 0x%x\n", base, size, prot);
+
+	/*
+	 * base must be within the static bounds, defined to be:
+	 * (vm_kernel_stext, kc_highest_nonlinkedit_vmaddr)
+	 */
+#if DEVELOPMENT || DEBUG || KASAN
+	assert(kc_highest_nonlinkedit_vmaddr > 0 && base > vm_kernel_stext && base < kc_highest_nonlinkedit_vmaddr);
+#else   /* On release kernels, assume this is a protection mismatch failure. */
+	if (kc_highest_nonlinkedit_vmaddr == 0 || base < vm_kernel_stext || base >= kc_highest_nonlinkedit_vmaddr) {
+		return KERN_FAILURE;
+	}
+#endif
+
+	for (offset = 0; offset < size; offset += PAGE_SIZE) {
+		if (pmap_get_prot(kernel_pmap, base + offset, &pageprot) == KERN_FAILURE) {
+			return KERN_FAILURE;
+		}
+		if ((pageprot & prot) != prot) {
+			return KERN_FAILURE;
+		}
+	}
+
+	return KERN_SUCCESS;
+}
 
+vm_offset_t
+ml_static_unslide(
+	vm_offset_t vaddr)
+{
+	return VM_KERNEL_UNSLIDE(vaddr);
+}
 
 /*
- *	Routine:        ml_static_mfree
- *	Function:
+ * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
+ * by the kernel.
  */
 void
 ml_static_mfree(
 	vm_offset_t vaddr,
 	vm_size_t size)
 {
-	vm_offset_t vaddr_cur;
+	addr64_t vaddr_cur;
 	ppnum_t ppn;
+	uint32_t freed_pages = 0;
+	vm_size_t map_size;
+
+	assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
 
-//	if (vaddr < VM_MIN_KERNEL_ADDRESS) return;
+	assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
 
-	assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */
+	for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
+		map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
 
-	for (vaddr_cur = vaddr;
-	     vaddr_cur < round_page_32(vaddr+size);
-	     vaddr_cur += PAGE_SIZE) {
-		ppn = pmap_find_phys(kernel_pmap, (addr64_t)vaddr_cur);
-		if (ppn != (vm_offset_t)NULL) {
-		        kernel_pmap->stats.resident_count++;
-			if (kernel_pmap->stats.resident_count >
-			    kernel_pmap->stats.resident_max) {
-				kernel_pmap->stats.resident_max =
-					kernel_pmap->stats.resident_count;
+		/* just skip if nothing mapped here */
+		if (map_size == 0) {
+			vaddr_cur += PAGE_SIZE;
+			continue;
+		}
+
+		/*
+		 * Can't free from the middle of a large page.
+		 */
+		assert((vaddr_cur & (map_size - 1)) == 0);
+
+		ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
+		assert(ppn != (ppnum_t)NULL);
+
+		pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
+		while (map_size > 0) {
+			if (++kernel_pmap->stats.resident_count > kernel_pmap->stats.resident_max) {
+				kernel_pmap->stats.resident_max = kernel_pmap->stats.resident_count;
 			}
-			pmap_remove(kernel_pmap, (addr64_t)vaddr_cur, (addr64_t)(vaddr_cur+PAGE_SIZE));
-			vm_page_create(ppn,(ppn+1));
-			vm_page_wire_count--;
+
+			assert(pmap_valid_page(ppn));
+			if (IS_MANAGED_PAGE(ppn)) {
+				vm_page_create(ppn, (ppn + 1));
+				freed_pages++;
+			}
+			map_size -= PAGE_SIZE;
+			vaddr_cur += PAGE_SIZE;
+			ppn++;
 		}
 	}
+	vm_page_lockspin_queues();
+	vm_page_wire_count -= freed_pages;
+	vm_page_wire_count_initial -= freed_pages;
+	if (vm_page_wire_count_on_boot != 0) {
+		assert(vm_page_wire_count_on_boot >= freed_pages);
+		vm_page_wire_count_on_boot -= freed_pages;
+	}
+	vm_page_unlock_queues();
+
+#if     DEBUG
+	kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+#endif
 }
 
+/* Change page protections for addresses previously loaded by efiboot */
+kern_return_t
+ml_static_protect(vm_offset_t vmaddr, vm_size_t size, vm_prot_t prot)
+{
+	boolean_t NX = !!!(prot & VM_PROT_EXECUTE), ro = !!!(prot & VM_PROT_WRITE);
+
+	assert(prot & VM_PROT_READ);
+
+	pmap_mark_range(kernel_pmap, vmaddr, size, NX, ro);
+
+	return KERN_SUCCESS;
+}
 
 /* virtual to physical on wired pages */
-vm_offset_t ml_vtophys(
+vm_offset_t
+ml_vtophys(
 	vm_offset_t vaddr)
 {
-	return	kvtophys(vaddr);
+	return (vm_offset_t)kvtophys(vaddr);
 }
 
 /*
@@ -169,24 +283,30 @@ vm_offset_t ml_vtophys(
  *			the duration of the copy process.
  */
 
-vm_size_t ml_nofault_copy(
+vm_size_t
+ml_nofault_copy(
 	vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
 {
 	addr64_t cur_phys_dst, cur_phys_src;
 	uint32_t count, nbytes = 0;
 
 	while (size > 0) {
-		if (!(cur_phys_src = kvtophys(virtsrc)))
+		if (!(cur_phys_src = kvtophys(virtsrc))) {
 			break;
-		if (!(cur_phys_dst = kvtophys(virtdst)))
+		}
+		if (!(cur_phys_dst = kvtophys(virtdst))) {
 			break;
-		if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src)))
+		}
+		if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
 			break;
-		count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
-		if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK)))
-			count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
-		if (count > size)
-			count = size;
+		}
+		count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
+		if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
+			count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
+		}
+		if (count > size) {
+			count = (uint32_t)size;
+		}
 
 		bcopy_phys(cur_phys_src, cur_phys_dst, count);
 
@@ -199,160 +319,205 @@ vm_size_t ml_nofault_copy(
 	return nbytes;
 }
 
+/*
+ *	Routine:        ml_validate_nofault
+ *	Function: Validate that ths address range has a valid translations
+ *			in the kernel pmap.  If translations are present, they are
+ *			assumed to be wired; i.e. no attempt is made to guarantee
+ *			that the translation persist after the check.
+ *  Returns: TRUE if the range is mapped and will not cause a fault,
+ *			FALSE otherwise.
+ */
+
+boolean_t
+ml_validate_nofault(
+	vm_offset_t virtsrc, vm_size_t size)
+{
+	addr64_t cur_phys_src;
+	uint32_t count;
+
+	while (size > 0) {
+		if (!(cur_phys_src = kvtophys(virtsrc))) {
+			return FALSE;
+		}
+		if (!pmap_valid_page(i386_btop(cur_phys_src))) {
+			return FALSE;
+		}
+		count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
+		if (count > size) {
+			count = (uint32_t)size;
+		}
+
+		virtsrc += count;
+		size -= count;
+	}
+
+	return TRUE;
+}
+
 /* Interrupt handling */
 
 /* Initialize Interrupts */
-void ml_init_interrupt(void)
+void
+ml_init_interrupt(void)
 {
 	(void) ml_set_interrupts_enabled(TRUE);
 }
 
+
 /* Get Interrupts Enabled */
-boolean_t ml_get_interrupts_enabled(void)
+boolean_t
+ml_get_interrupts_enabled(void)
 {
-  unsigned long flags;
+	unsigned long flags;
 
-  __asm__ volatile("pushf; popl	%0" :  "=r" (flags));
-  return (flags & EFL_IF) != 0;
+	__asm__ volatile ("pushf; pop	%0":  "=r" (flags));
+	return (flags & EFL_IF) != 0;
 }
 
 /* Set Interrupts Enabled */
-boolean_t ml_set_interrupts_enabled(boolean_t enable)
+boolean_t
+ml_set_interrupts_enabled(boolean_t enable)
 {
-  unsigned long flags;
+	unsigned long flags;
+	boolean_t istate;
+
+	__asm__ volatile ("pushf; pop	%0"  :  "=r" (flags));
 
-  __asm__ volatile("pushf; popl	%0" :  "=r" (flags));
+	assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
 
-  if (enable) {
-	ast_t		*myast;
+	istate = ((flags & EFL_IF) != 0);
 
-	myast = ast_pending();
+	if (enable) {
+		__asm__ volatile ("sti;nop");
 
-	if ( (get_preemption_level() == 0) &&  (*myast & AST_URGENT) ) {
-	__asm__ volatile("sti");
-          __asm__ volatile ("int $0xff");
-        } else {
-	  __asm__ volatile ("sti");
+		if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
+			__asm__ volatile ("int %0" :: "N" (T_PREEMPT));
+		}
+	} else {
+		if (istate) {
+			__asm__ volatile ("cli");
+		}
+	}
+
+	return istate;
+}
+
+/* Early Set Interrupts Enabled */
+boolean_t
+ml_early_set_interrupts_enabled(boolean_t enable)
+{
+	if (enable == TRUE) {
+		kprintf("Caller attempted to enable interrupts too early in "
+		    "kernel startup. Halting.\n");
+		hlt();
+		/*NOTREACHED*/
 	}
-  }
-  else {
-	__asm__ volatile("cli");
-  }
 
-  return (flags & EFL_IF) != 0;
+	/* On x86, do not allow interrupts to be enabled very early */
+	return FALSE;
 }
 
 /* Check if running at interrupt context */
-boolean_t ml_at_interrupt_context(void)
+boolean_t
+ml_at_interrupt_context(void)
 {
 	return get_interrupt_level() != 0;
 }
 
+void
+ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
+{
+	*icp = (get_interrupt_level() != 0);
+	/* These will be technically inaccurate for interrupts that occur
+	 * successively within a single "idle exit" event, but shouldn't
+	 * matter statistically.
+	 */
+	*pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
+}
+
 /* Generate a fake interrupt */
-void ml_cause_interrupt(void)
+__dead2
+void
+ml_cause_interrupt(void)
 {
 	panic("ml_cause_interrupt not defined yet on Intel");
 }
 
-void ml_thread_policy(
+/*
+ * TODO: transition users of this to kernel_thread_start_priority
+ * ml_thread_policy is an unsupported KPI
+ */
+void
+ml_thread_policy(
 	thread_t thread,
-__unused	unsigned policy_id,
+	__unused        unsigned policy_id,
 	unsigned policy_info)
 {
 	if (policy_info & MACHINE_NETWORK_WORKLOOP) {
-		spl_t		s = splsched();
+		thread_precedence_policy_data_t info;
+		__assert_only kern_return_t kret;
 
-		thread_lock(thread);
+		info.importance = 1;
 
-		set_priority(thread, thread->priority + 1);
-
-		thread_unlock(thread);
-		splx(s);
+		kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
+		    (thread_policy_t)&info,
+		    THREAD_PRECEDENCE_POLICY_COUNT);
+		assert(kret == KERN_SUCCESS);
 	}
 }
 
 /* Initialize Interrupts */
-void ml_install_interrupt_handler(
+void
+ml_install_interrupt_handler(
 	void *nub,
 	int source,
 	void *target,
 	IOInterruptHandler handler,
-	void *refCon)  
+	void *refCon)
 {
 	boolean_t current_state;
 
-	current_state = ml_get_interrupts_enabled();
+	current_state = ml_set_interrupts_enabled(FALSE);
 
 	PE_install_interrupt_handler(nub, source, target,
-	                             (IOInterruptHandler) handler, refCon);
+	    (IOInterruptHandler) handler, refCon);
 
 	(void) ml_set_interrupts_enabled(current_state);
-
-	initialize_screen(NULL, kPEAcquireScreen);
 }
 
 
 void
-machine_idle(void)
+machine_signal_idle(
+	processor_t processor)
 {
-	x86_core_t	*my_core = x86_core();
-	cpu_data_t	*my_cpu  = current_cpu_datap();
-	int		others_active;
-
-	/*
-	 * We halt this cpu thread
-	 * unless kernel param idlehalt is false and no other thread
-	 * in the same core is active - if so, don't halt so that this
-	 * core doesn't go into a low-power mode.
-	 * For 4/4, we set a null "active cr3" while idle.
-	 */
-	if (my_core == NULL || my_cpu == NULL)
-	    goto out;
-
-	others_active = !atomic_decl_and_test(
-				(long *) &my_core->active_lcpus, 1);
-	my_cpu->lcpu.idle = TRUE;
-	if (idlehalt || others_active) {
-		DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
-		MARK_CPU_IDLE(cpu_number());
-		machine_idle_cstate(FALSE);
-		MARK_CPU_ACTIVE(cpu_number());
-		DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
-	}
-	my_cpu->lcpu.idle = FALSE;
-	atomic_incl((long *) &my_core->active_lcpus, 1);
-  out:
-	__asm__ volatile("sti");
+	cpu_interrupt(processor->cpu_id);
 }
 
+__dead2
 void
-machine_signal_idle(
-        processor_t processor)
+machine_signal_idle_deferred(
+	__unused processor_t processor)
 {
-	cpu_interrupt(PROCESSOR_DATA(processor, slot_num));
+	panic("Unimplemented");
 }
 
-thread_t        
-machine_processor_shutdown(
-	thread_t	thread,
-	void		(*doshutdown)(processor_t),
-	processor_t	processor)
+__dead2
+void
+machine_signal_idle_cancel(
+	__unused processor_t processor)
 {
-	vmx_suspend();
-	fpu_save_context(thread);
-	return(Shutdown_context(thread, doshutdown, processor));
+	panic("Unimplemented");
 }
 
-kern_return_t
-ml_processor_register(
-	cpu_id_t	cpu_id,
-	uint32_t	lapic_id,
-	processor_t	*processor_out,
-	ipi_handler_t   *ipi_handler,
-	boolean_t	boot_cpu)
+static kern_return_t
+register_cpu(
+	uint32_t        lapic_id,
+	processor_t     *processor_out,
+	boolean_t       boot_cpu )
 {
-	int		target_cpu;
-	cpu_data_t	*this_cpu_datap;
+	int             target_cpu;
+	cpu_data_t      *this_cpu_datap;
 
 	this_cpu_datap = cpu_data_alloc(boot_cpu);
 	if (this_cpu_datap == NULL) {
@@ -360,155 +525,301 @@ ml_processor_register(
 	}
 	target_cpu = this_cpu_datap->cpu_number;
 	assert((boot_cpu && (target_cpu == 0)) ||
-	      (!boot_cpu && (target_cpu != 0)));
+	    (!boot_cpu && (target_cpu != 0)));
 
 	lapic_cpu_map(lapic_id, target_cpu);
 
-	this_cpu_datap->cpu_id = cpu_id;
+	/* The cpu_id is not known at registration phase. Just do
+	 * lapic_id for now
+	 */
 	this_cpu_datap->cpu_phys_number = lapic_id;
 
 	this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
-	if (this_cpu_datap->cpu_console_buf == NULL)
+	if (this_cpu_datap->cpu_console_buf == NULL) {
 		goto failed;
+	}
 
-	this_cpu_datap->cpu_chud = chudxnu_cpu_alloc(boot_cpu);
-	if (this_cpu_datap->cpu_chud == NULL)
+#if KPC
+	if (kpc_register_cpu(this_cpu_datap) != TRUE) {
 		goto failed;
+	}
+#endif
 
 	if (!boot_cpu) {
-		this_cpu_datap->lcpu.core = cpu_thread_alloc(this_cpu_datap->cpu_number);
-		if (this_cpu_datap->lcpu.core == NULL)
-			goto failed;
-
-		pmCPUStateInit();
-
-		this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
-		if (this_cpu_datap->cpu_pmap == NULL)
+		cpu_thread_alloc(this_cpu_datap->cpu_number);
+		if (this_cpu_datap->lcpu.core == NULL) {
 			goto failed;
-
-		this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu);
-		if (this_cpu_datap->cpu_processor == NULL)
-			goto failed;
-		/*
-		 * processor_init() deferred to topology start
-		 * because "slot numbers" a.k.a. logical processor numbers
-	 	 * are not yet finalized.
-		 */
+		}
 	}
 
+	/*
+	 * processor_init() deferred to topology start
+	 * because "slot numbers" a.k.a. logical processor numbers
+	 * are not yet finalized.
+	 */
 	*processor_out = this_cpu_datap->cpu_processor;
-	*ipi_handler = NULL;
-
-	if (target_cpu == machine_info.max_cpus - 1) {
-		/*
-		 * All processors are now registered but not started (except
-		 * for this "in-limbo" boot processor). We call to the machine
-		 * topology code to finalize and activate the topology.
-		 */
-		cpu_topology_start();
-	}
 
 	return KERN_SUCCESS;
 
 failed:
-	cpu_processor_free(this_cpu_datap->cpu_processor);
-	pmap_cpu_free(this_cpu_datap->cpu_pmap);
-	chudxnu_cpu_free(this_cpu_datap->cpu_chud);
 	console_cpu_free(this_cpu_datap->cpu_console_buf);
+#if KPC
+	kpc_unregister_cpu(this_cpu_datap);
+#endif /* KPC */
+
 	return KERN_FAILURE;
 }
 
+
+kern_return_t
+ml_processor_register(
+	cpu_id_t        cpu_id,
+	uint32_t        lapic_id,
+	processor_t     *processor_out,
+	boolean_t       boot_cpu,
+	boolean_t       start )
+{
+	static boolean_t done_topo_sort = FALSE;
+	static uint32_t num_registered = 0;
+
+	/* Register all CPUs first, and track max */
+	if (start == FALSE) {
+		num_registered++;
+
+		DBG( "registering CPU lapic id %d\n", lapic_id );
+
+		return register_cpu( lapic_id, processor_out, boot_cpu );
+	}
+
+	/* Sort by topology before we start anything */
+	if (!done_topo_sort) {
+		DBG( "about to start CPUs. %d registered\n", num_registered );
+
+		cpu_topology_sort( num_registered );
+		done_topo_sort = TRUE;
+	}
+
+	/* Assign the cpu ID */
+	uint32_t cpunum = -1;
+	cpu_data_t  *this_cpu_datap = NULL;
+
+	/* find cpu num and pointer */
+	cpunum = ml_get_cpuid( lapic_id );
+
+	if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
+		panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
+	}
+
+	this_cpu_datap = cpu_datap(cpunum);
+
+	/* fix the CPU id */
+	this_cpu_datap->cpu_id = cpu_id;
+
+	/* allocate and initialize other per-cpu structures */
+	if (!boot_cpu) {
+		mp_cpus_call_cpu_init(cpunum);
+		random_cpu_init(cpunum);
+	}
+
+	/* output arg */
+	*processor_out = this_cpu_datap->cpu_processor;
+
+	/* OK, try and start this CPU */
+	return cpu_topology_start_cpu( cpunum );
+}
+
+
 void
 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 {
-	boolean_t	os_supports_sse;
+	boolean_t       os_supports_sse;
 	i386_cpu_info_t *cpuid_infop;
 
-	if (cpu_infop == NULL)
+	if (cpu_infop == NULL) {
 		return;
- 
+	}
+
 	/*
 	 * Are we supporting MMX/SSE/SSE2/SSE3?
 	 * As distinct from whether the cpu has these capabilities.
 	 */
-	os_supports_sse = get_cr4() & CR4_XMM;
-	if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
+	os_supports_sse = !!(get_cr4() & CR4_OSXMM);
+
+	if (ml_fpu_avx_enabled()) {
+		cpu_infop->vector_unit = 9;
+	} else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
 		cpu_infop->vector_unit = 8;
-	else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse)
+	} else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
 		cpu_infop->vector_unit = 7;
-	else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse)
+	} else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
 		cpu_infop->vector_unit = 6;
-	else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse)
+	} else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
 		cpu_infop->vector_unit = 5;
-	else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse)
+	} else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
 		cpu_infop->vector_unit = 4;
-	else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse)
+	} else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
 		cpu_infop->vector_unit = 3;
-	else if (cpuid_features() & CPUID_FEATURE_MMX)
+	} else if (cpuid_features() & CPUID_FEATURE_MMX) {
 		cpu_infop->vector_unit = 2;
-	else
+	} else {
 		cpu_infop->vector_unit = 0;
+	}
 
 	cpuid_infop  = cpuid_info();
 
-	cpu_infop->cache_line_size = cpuid_infop->cache_linesize; 
+	cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
 
 	cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
 	cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
-  
-        if (cpuid_infop->cache_size[L2U] > 0) {
-            cpu_infop->l2_settings = 1;
-            cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
-        } else {
-            cpu_infop->l2_settings = 0;
-            cpu_infop->l2_cache_size = 0xFFFFFFFF;
-        }
 
-        if (cpuid_infop->cache_size[L3U] > 0) {
-            cpu_infop->l3_settings = 1;
-            cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
-        } else {
-            cpu_infop->l3_settings = 0;
-            cpu_infop->l3_cache_size = 0xFFFFFFFF;
-        }
+	if (cpuid_infop->cache_size[L2U] > 0) {
+		cpu_infop->l2_settings = 1;
+		cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
+	} else {
+		cpu_infop->l2_settings = 0;
+		cpu_infop->l2_cache_size = 0xFFFFFFFF;
+	}
+
+	if (cpuid_infop->cache_size[L3U] > 0) {
+		cpu_infop->l3_settings = 1;
+		cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
+	} else {
+		cpu_infop->l3_settings = 0;
+		cpu_infop->l3_cache_size = 0xFFFFFFFF;
+	}
 }
 
-void
-ml_init_max_cpus(unsigned long max_cpus)
+int
+ml_early_cpu_max_number(void)
 {
-        boolean_t current_state;
+	int n = max_ncpus;
+
+	assert(startup_phase >= STARTUP_SUB_TUNABLES);
+	if (max_cpus_from_firmware) {
+		n = MIN(n, max_cpus_from_firmware);
+	}
+	return n - 1;
+}
 
-        current_state = ml_set_interrupts_enabled(FALSE);
-        if (max_cpus_initialized != MAX_CPUS_SET) {
-                if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
+void
+ml_set_max_cpus(unsigned int max_cpus)
+{
+	lck_mtx_lock(&max_cpus_lock);
+	if (max_cpus_initialized != MAX_CPUS_SET) {
+		if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
 			/*
 			 * Note: max_cpus is the number of enabled processors
 			 * that ACPI found; max_ncpus is the maximum number
 			 * that the kernel supports or that the "cpus="
 			 * boot-arg has set. Here we take int minimum.
 			 */
-                        machine_info.max_cpus = MIN(max_cpus, max_ncpus);
+			machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
 		}
-                if (max_cpus_initialized == MAX_CPUS_WAIT)
-                        wakeup((event_t)&max_cpus_initialized);
-                max_cpus_initialized = MAX_CPUS_SET;
-        }
-        (void) ml_set_interrupts_enabled(current_state);
+		if (max_cpus_initialized == MAX_CPUS_WAIT) {
+			thread_wakeup((event_t) &max_cpus_initialized);
+		}
+		max_cpus_initialized = MAX_CPUS_SET;
+	}
+	lck_mtx_unlock(&max_cpus_lock);
 }
 
-int
-ml_get_max_cpus(void)
+unsigned int
+ml_wait_max_cpus(void)
 {
-        boolean_t current_state;
+	lck_mtx_lock(&max_cpus_lock);
+	while (max_cpus_initialized != MAX_CPUS_SET) {
+		max_cpus_initialized = MAX_CPUS_WAIT;
+		lck_mtx_sleep(&max_cpus_lock, LCK_SLEEP_DEFAULT, &max_cpus_initialized, THREAD_UNINT);
+	}
+	lck_mtx_unlock(&max_cpus_lock);
+	return machine_info.max_cpus;
+}
 
-        current_state = ml_set_interrupts_enabled(FALSE);
-        if (max_cpus_initialized != MAX_CPUS_SET) {
-                max_cpus_initialized = MAX_CPUS_WAIT;
-                assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT);
-                (void)thread_block(THREAD_CONTINUE_NULL);
-        }
-        (void) ml_set_interrupts_enabled(current_state);
-        return(machine_info.max_cpus);
+void
+ml_panic_trap_to_debugger(__unused const char *panic_format_str,
+    __unused va_list *panic_args,
+    __unused unsigned int reason,
+    __unused void *ctx,
+    __unused uint64_t panic_options_mask,
+    __unused unsigned long panic_caller)
+{
+	return;
+}
+
+static uint64_t
+virtual_timeout_inflate64(unsigned int vti, uint64_t timeout, uint64_t max_timeout)
+{
+	if (vti >= 64) {
+		return max_timeout;
+	}
+
+	if ((timeout << vti) >> vti != timeout) {
+		return max_timeout;
+	}
+
+	if ((timeout << vti) > max_timeout) {
+		return max_timeout;
+	}
+
+	return timeout << vti;
+}
+
+static uint32_t
+virtual_timeout_inflate32(unsigned int vti, uint32_t timeout, uint32_t max_timeout)
+{
+	if (vti >= 32) {
+		return max_timeout;
+	}
+
+	if ((timeout << vti) >> vti != timeout) {
+		return max_timeout;
+	}
+
+	return timeout << vti;
+}
+
+/*
+ * Some timeouts are later adjusted or used in calculations setting
+ * other values. In order to avoid overflow, cap the max timeout as
+ * 2^47ns (~39 hours).
+ */
+static const uint64_t max_timeout_ns = 1ULL << 47;
+
+/*
+ * Inflate a timeout in absolutetime.
+ */
+static uint64_t
+virtual_timeout_inflate_abs(unsigned int vti, uint64_t timeout)
+{
+	uint64_t max_timeout;
+	nanoseconds_to_absolutetime(max_timeout_ns, &max_timeout);
+	return virtual_timeout_inflate64(vti, timeout, max_timeout);
+}
+
+/*
+ * Inflate a value in TSC ticks.
+ */
+static uint64_t
+virtual_timeout_inflate_tsc(unsigned int vti, uint64_t timeout)
+{
+	const uint64_t max_timeout = tmrCvt(max_timeout_ns, tscFCvtn2t);
+	return virtual_timeout_inflate64(vti, timeout, max_timeout);
+}
+
+/*
+ * Inflate a timeout in microseconds.
+ */
+static uint32_t
+virtual_timeout_inflate_us(unsigned int vti, uint64_t timeout)
+{
+	const uint32_t max_timeout = ~0;
+	return virtual_timeout_inflate32(vti, timeout, max_timeout);
+}
+
+uint64_t
+ml_get_timebase_entropy(void)
+{
+	return __builtin_ia32_rdtsc();
 }
 
 /*
@@ -518,26 +829,160 @@ ml_get_max_cpus(void)
 void
 ml_init_lock_timeout(void)
 {
-	uint64_t	abstime;
-	uint32_t	mtxspin; 
+	uint64_t        abstime;
+	uint32_t        mtxspin;
+#if DEVELOPMENT || DEBUG
+	uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
+#else
+	uint64_t        default_timeout_ns = NSEC_PER_SEC >> 1;
+#endif
+	uint32_t        slto;
+	uint32_t        prt;
 
-	/* LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks */
-	nanoseconds_to_absolutetime(NSEC_PER_SEC>>2, &abstime);
-	LockTimeOut = (uint32_t) abstime;
-	LockTimeOutTSC = (uint32_t) tmrCvt(abstime, tscFCvtn2t);
+	if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
+		default_timeout_ns = slto * NSEC_PER_USEC;
+	}
+
+	/*
+	 * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
+	 * and LockTimeOutUsec is in microseconds and it's 32-bits.
+	 */
+	LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
+	nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+	LockTimeOut = abstime;
+	LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
 
-	if (PE_parse_boot_arg("mtxspin", &mtxspin)) {
-		if (mtxspin > USEC_PER_SEC>>4)
-			mtxspin =  USEC_PER_SEC>>4;
-		nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime);
+	/*
+	 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
+	 * LockTimeOut but can be overriden separately. In particular, a
+	 * zero value inhibits the timeout-panic and cuts a trace evnt instead
+	 * - see pmap_flush_tlbs().
+	 */
+	if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
+		default_timeout_ns = slto * NSEC_PER_USEC;
+		nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+		TLBTimeOut = (uint32_t) abstime;
 	} else {
-		nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime);
+		TLBTimeOut = LockTimeOut;
+	}
+
+#if DEVELOPMENT || DEBUG
+	reportphyreaddelayabs = LockTimeOut >> 1;
+#endif
+	if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
+		default_timeout_ns = slto * NSEC_PER_USEC;
+		nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+		reportphyreaddelayabs = abstime;
+	}
+
+	if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
+		nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
+		reportphywritedelayabs = abstime;
+	}
+
+	if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
+		nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
+		tracephyreaddelayabs = abstime;
+	}
+
+	if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
+		nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
+		tracephywritedelayabs = abstime;
+	}
+
+	if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
+		if (mtxspin > USEC_PER_SEC >> 4) {
+			mtxspin =  USEC_PER_SEC >> 4;
+		}
+		nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
+	} else {
+		nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 	}
 	MutexSpin = (unsigned int)abstime;
+	low_MutexSpin = MutexSpin;
+	/*
+	 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+	 * real_ncpus is not set at this time
+	 */
+	high_MutexSpin = -1;
+
+	nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+	if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
+		nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
+	}
+
+	virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
+	if (virtualized) {
+		unsigned int vti;
+
+		if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
+			vti = 6;
+		}
+		printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
+		kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
+#define VIRTUAL_TIMEOUT_INFLATE_ABS(_timeout)              \
+MACRO_BEGIN                                                \
+	kprintf("%24s: 0x%016llx ", #_timeout, _timeout);      \
+	_timeout = virtual_timeout_inflate_abs(vti, _timeout); \
+	kprintf("-> 0x%016llx\n",  _timeout);                  \
+MACRO_END
+
+#define VIRTUAL_TIMEOUT_INFLATE_TSC(_timeout)              \
+MACRO_BEGIN                                                \
+	kprintf("%24s: 0x%016llx ", #_timeout, _timeout);      \
+	_timeout = virtual_timeout_inflate_tsc(vti, _timeout); \
+	kprintf("-> 0x%016llx\n",  _timeout);                  \
+MACRO_END
+#define VIRTUAL_TIMEOUT_INFLATE_US(_timeout)               \
+MACRO_BEGIN                                                \
+	kprintf("%24s:         0x%08x ", #_timeout, _timeout); \
+	_timeout = virtual_timeout_inflate_us(vti, _timeout);  \
+	kprintf("-> 0x%08x\n",  _timeout);                     \
+MACRO_END
+		VIRTUAL_TIMEOUT_INFLATE_US(LockTimeOutUsec);
+		VIRTUAL_TIMEOUT_INFLATE_ABS(LockTimeOut);
+		VIRTUAL_TIMEOUT_INFLATE_TSC(LockTimeOutTSC);
+		VIRTUAL_TIMEOUT_INFLATE_ABS(TLBTimeOut);
+		VIRTUAL_TIMEOUT_INFLATE_ABS(MutexSpin);
+		VIRTUAL_TIMEOUT_INFLATE_ABS(low_MutexSpin);
+		VIRTUAL_TIMEOUT_INFLATE_ABS(reportphyreaddelayabs);
+	}
+
+	interrupt_latency_tracker_setup();
+	simple_lock_init(&ml_timer_evaluation_slock, 0);
 }
 
 /*
- * This is called from the machine-independent routine cpu_up()
+ * Threshold above which we should attempt to block
+ * instead of spinning for clock_delay_until().
+ */
+
+void
+ml_init_delay_spin_threshold(int threshold_us)
+{
+	nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
+}
+
+boolean_t
+ml_delay_should_spin(uint64_t interval)
+{
+	return (interval < delay_spin_threshold) ? TRUE : FALSE;
+}
+
+TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
+
+void
+ml_delay_on_yield(void)
+{
+#if DEVELOPMENT || DEBUG
+	if (yield_delay_us) {
+		delay(yield_delay_us);
+	}
+#endif
+}
+
+/*
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates. Defer to cpu_thread_init().
  */
 void
@@ -547,12 +992,14 @@ ml_cpu_up(void)
 }
 
 /*
- * This is called from the machine-independent routine cpu_down()
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates.
  */
 void
 ml_cpu_down(void)
 {
+	i386_deactivate_cpu();
+
 	return;
 }
 
@@ -560,108 +1007,262 @@ ml_cpu_down(void)
  * The following are required for parts of the kernel
  * that cannot resolve these functions as inlines:
  */
-extern thread_t current_act(void);
+extern thread_t current_act(void) __attribute__((const));
 thread_t
 current_act(void)
 {
-  return(current_thread_fast());
+	return current_thread_fast();
 }
 
 #undef current_thread
-extern thread_t current_thread(void);
+extern thread_t current_thread(void) __attribute__((const));
 thread_t
 current_thread(void)
 {
-  return(current_thread_fast());
+	return current_thread_fast();
 }
 
 
-boolean_t ml_is64bit(void) {
-
-        return (cpu_mode_is64bit());
+boolean_t
+ml_is64bit(void)
+{
+	return cpu_mode_is64bit();
 }
 
 
-boolean_t ml_thread_is64bit(thread_t thread) {
-  
-        return (thread_is_64bit(thread));
+boolean_t
+ml_thread_is64bit(thread_t thread)
+{
+	return thread_is_64bit_addr(thread);
 }
 
 
-boolean_t ml_state_is64bit(void *saved_state) {
-
+boolean_t
+ml_state_is64bit(void *saved_state)
+{
 	return is_saved_state64(saved_state);
 }
 
-void ml_cpu_set_ldt(int selector)
+void
+ml_cpu_set_ldt(int selector)
 {
 	/*
 	 * Avoid loading the LDT
 	 * if we're setting the KERNEL LDT and it's already set.
 	 */
 	if (selector == KERNEL_LDT &&
-	    current_cpu_datap()->cpu_ldt == KERNEL_LDT)
+	    current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
 		return;
+	}
 
-	/*
- 	 * If 64bit this requires a mode switch (and back). 
-	 */
-	if (cpu_mode_is64bit())
-		ml_64bit_lldt(selector);
-	else
-		lldt(selector);
-	current_cpu_datap()->cpu_ldt = selector;	
+	lldt(selector);
+	current_cpu_datap()->cpu_ldt = selector;
 }
 
-void ml_fp_setvalid(boolean_t value)
+void
+ml_fp_setvalid(boolean_t value)
 {
-        fp_setvalid(value);
+	fp_setvalid(value);
 }
 
-uint64_t ml_cpu_int_event_time(void)
+uint64_t
+ml_cpu_int_event_time(void)
 {
 	return current_cpu_datap()->cpu_int_event_time;
 }
 
-
-#if MACH_KDB
-
-/*
- *	Display the global msrs
- * *		
- *	ms
- */
-void 
-db_msr(__unused db_expr_t addr,
-       __unused int have_addr,
-       __unused db_expr_t count,
-       __unused char *modif)
+vm_offset_t
+ml_stack_remaining(void)
 {
+	uintptr_t local = (uintptr_t) &local;
 
-	uint32_t        i, msrlow, msrhigh;
+	if (ml_at_interrupt_context() != 0) {
+		return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
+	} else {
+		return local - current_thread()->kernel_stack;
+	}
+}
 
-	/* Try all of the first 4096 msrs */
-	for (i = 0; i < 4096; i++) {
-		if (!rdmsr_carefully(i, &msrlow, &msrhigh)) {
-			db_printf("%08X - %08X.%08X\n", i, msrhigh, msrlow);
-		}
+#if KASAN
+vm_offset_t ml_stack_base(void);
+vm_size_t ml_stack_size(void);
+
+vm_offset_t
+ml_stack_base(void)
+{
+	if (ml_at_interrupt_context()) {
+		return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
+	} else {
+		return current_thread()->kernel_stack;
 	}
+}
 
-	/* Try all of the 4096 msrs at 0x0C000000 */
-	for (i = 0; i < 4096; i++) {
-		if (!rdmsr_carefully(0x0C000000 | i, &msrlow, &msrhigh)) {
-			db_printf("%08X - %08X.%08X\n",
-				0x0C000000 | i, msrhigh, msrlow);
-		}
+vm_size_t
+ml_stack_size(void)
+{
+	if (ml_at_interrupt_context()) {
+		return INTSTACK_SIZE;
+	} else {
+		return kernel_stack_size;
 	}
+}
+#endif
+
+void
+kernel_preempt_check(void)
+{
+	boolean_t       intr;
+	unsigned long flags;
+
+	assert(get_preemption_level() == 0);
+
+	if (__improbable(*ast_pending() & AST_URGENT)) {
+		/*
+		 * can handle interrupts and preemptions
+		 * at this point
+		 */
+		__asm__ volatile ("pushf; pop	%0"  :  "=r" (flags));
+
+		intr = ((flags & EFL_IF) != 0);
 
-	/* Try all of the 4096 msrs at 0xC0000000 */
-	for (i = 0; i < 4096; i++) {
-		if (!rdmsr_carefully(0xC0000000 | i, &msrlow, &msrhigh)) {
-			db_printf("%08X - %08X.%08X\n",
-				0xC0000000 | i, msrhigh, msrlow);
+		/*
+		 * now cause the PRE-EMPTION trap
+		 */
+		if (intr == TRUE) {
+			__asm__ volatile ("int %0" :: "N" (T_PREEMPT));
 		}
 	}
 }
 
-#endif
+boolean_t
+machine_timeout_suspended(void)
+{
+	return pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
+}
+
+/* Eagerly evaluate all pending timer and thread callouts
+ */
+void
+ml_timer_evaluate(void)
+{
+	KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+	uint64_t te_end, te_start = mach_absolute_time();
+	simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
+	ml_timer_evaluation_in_progress = TRUE;
+	thread_call_delayed_timer_rescan_all();
+	mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
+	ml_timer_evaluation_in_progress = FALSE;
+	ml_timer_eager_evaluations++;
+	te_end = mach_absolute_time();
+	ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
+	simple_unlock(&ml_timer_evaluation_slock);
+
+	KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
+
+boolean_t
+ml_timer_forced_evaluation(void)
+{
+	return ml_timer_evaluation_in_progress;
+}
+
+uint64_t
+ml_energy_stat(__unused thread_t t)
+{
+	return 0;
+}
+
+void
+ml_gpu_stat_update(uint64_t gpu_ns_delta)
+{
+	current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
+}
+
+uint64_t
+ml_gpu_stat(thread_t t)
+{
+	return t->machine.thread_gpu_ns;
+}
+
+int plctrace_enabled = 0;
+
+void
+_disable_preemption(void)
+{
+	disable_preemption_internal();
+}
+
+void
+_enable_preemption(void)
+{
+	enable_preemption_internal();
+}
+
+void
+plctrace_disable(void)
+{
+	plctrace_enabled = 0;
+}
+
+static boolean_t ml_quiescing;
+
+void
+ml_set_is_quiescing(boolean_t quiescing)
+{
+	ml_quiescing = quiescing;
+}
+
+boolean_t
+ml_is_quiescing(void)
+{
+	return ml_quiescing;
+}
+
+uint64_t
+ml_get_booter_memory_size(void)
+{
+	return 0;
+}
+
+void
+machine_lockdown(void)
+{
+	x86_64_protect_data_const();
+}
+
+bool
+ml_cpu_can_exit(__unused int cpu_id)
+{
+	return true;
+}
+
+void
+ml_cpu_begin_state_transition(__unused int cpu_id)
+{
+}
+
+void
+ml_cpu_end_state_transition(__unused int cpu_id)
+{
+}
+
+void
+ml_cpu_begin_loop(void)
+{
+}
+
+void
+ml_cpu_end_loop(void)
+{
+}
+
+size_t
+ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
+{
+#pragma unused(vm_is64bit)
+	assert(regions != NULL);
+
+	*regions = NULL;
+	return 0;
+}