X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/b0d623f7f2ae71ed96e60569f61f9a9a27016e80..3e170ce000f1506b7b5d2c5c7faec85ceabb573d:/osfmk/i386/machine_routines.c

diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c
index d0307ca7f..a90d68178 100644
--- a/osfmk/i386/machine_routines.c
+++ b/osfmk/i386/machine_routines.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -36,26 +36,26 @@
 #include <kern/cpu_data.h>
 #include <kern/cpu_number.h>
 #include <kern/thread.h>
+#include <kern/thread_call.h>
+#include <prng/random.h>
 #include <i386/machine_cpu.h>
 #include <i386/lapic.h>
+#include <i386/bit_routines.h>
 #include <i386/mp_events.h>
 #include <i386/pmCPU.h>
+#include <i386/trap.h>
 #include <i386/tsc.h>
 #include <i386/cpu_threads.h>
 #include <i386/proc_reg.h>
 #include <mach/vm_param.h>
 #include <i386/pmap.h>
+#include <i386/pmap_internal.h>
 #include <i386/misc_protos.h>
-#if MACH_KDB
-#include <machine/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
+#include <kern/timer_queue.h>
+#if KPC
+#include <kern/kpc.h>
 #endif
+#include <architecture/i386/pio.h>
 
 #if DEBUG
 #define DBG(x...)	kprintf("DBG: " x)
@@ -63,15 +63,26 @@
 #define DBG(x...)
 #endif
 
-
 extern void 	wakeup(void *);
 
 static int max_cpus_initialized = 0;
 
 unsigned int	LockTimeOut;
+unsigned int	TLBTimeOut;
 unsigned int	LockTimeOutTSC;
 unsigned int	MutexSpin;
 uint64_t	LastDebuggerEntryAllowance;
+uint64_t	delay_spin_threshold;
+
+extern uint64_t panic_restart_timeout;
+
+boolean_t virtualized = FALSE;
+
+decl_simple_lock_data(static,  ml_timer_evaluation_slock);
+uint32_t ml_timer_eager_evaluations;
+uint64_t ml_timer_eager_evaluation_max;
+static boolean_t ml_timer_evaluation_in_progress = FALSE;
+
 
 #define MAX_CPUS_SET    0x1
 #define MAX_CPUS_WAIT   0x2
@@ -96,8 +107,8 @@ vm_offset_t ml_static_malloc(
 
 void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
 {
-        *phys_addr = bounce_pool_base;
-	*size      = bounce_pool_size;
+        *phys_addr = 0;
+	*size      = 0;
 }
 
 
@@ -124,14 +135,13 @@ ml_static_mfree(
 {
 	addr64_t vaddr_cur;
 	ppnum_t ppn;
-
+	uint32_t freed_pages = 0;
 	assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
 
 	assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */
 
-
 	for (vaddr_cur = vaddr;
-	     vaddr_cur < round_page_64(vaddr+size);
+ 	     vaddr_cur < round_page_64(vaddr+size);
 	     vaddr_cur += PAGE_SIZE) {
 		ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
 		if (ppn != (vm_offset_t)NULL) {
@@ -142,10 +152,21 @@ ml_static_mfree(
 					kernel_pmap->stats.resident_count;
 			}
 			pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur+PAGE_SIZE);
-			vm_page_create(ppn,(ppn+1));
-			vm_page_wire_count--;
+			assert(pmap_valid_page(ppn));
+			if (IS_MANAGED_PAGE(ppn)) {
+				vm_page_create(ppn,(ppn+1));
+				freed_pages++;
+			}
 		}
 	}
+	vm_page_lockspin_queues();
+	vm_page_wire_count -= freed_pages;
+	vm_page_wire_count_initial -= freed_pages;
+	vm_page_unlock_queues();
+
+#if	DEBUG	
+	kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+#endif
 }
 
 
@@ -196,6 +217,38 @@ vm_size_t ml_nofault_copy(
 	return nbytes;
 }
 
+/*
+ *	Routine:        ml_validate_nofault
+ *	Function: Validate that ths address range has a valid translations
+ *			in the kernel pmap.  If translations are present, they are
+ *			assumed to be wired; i.e. no attempt is made to guarantee
+ *			that the translation persist after the check.
+ *  Returns: TRUE if the range is mapped and will not cause a fault,
+ *			FALSE otherwise.
+ */
+
+boolean_t ml_validate_nofault(
+	vm_offset_t virtsrc, vm_size_t size)
+{
+	addr64_t cur_phys_src;
+	uint32_t count;
+
+	while (size > 0) {
+		if (!(cur_phys_src = kvtophys(virtsrc)))
+			return FALSE;
+		if (!pmap_valid_page(i386_btop(cur_phys_src)))
+			return FALSE;
+		count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
+		if (count > size)
+			count = (uint32_t)size;
+
+		virtsrc += count;
+		size -= count;
+	}
+
+	return TRUE;
+}
+
 /* Interrupt handling */
 
 /* Initialize Interrupts */
@@ -205,7 +258,6 @@ void ml_init_interrupt(void)
 }
 
 
-
 /* Get Interrupts Enabled */
 boolean_t ml_get_interrupts_enabled(void)
 {
@@ -218,27 +270,27 @@ boolean_t ml_get_interrupts_enabled(void)
 /* Set Interrupts Enabled */
 boolean_t ml_set_interrupts_enabled(boolean_t enable)
 {
-  unsigned long flags;
+	unsigned long flags;
+	boolean_t istate;
+	
+	__asm__ volatile("pushf; pop	%0" :  "=r" (flags));
 
-  __asm__ volatile("pushf; pop	%0" :  "=r" (flags));
+	assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
 
-  if (enable) {
-	ast_t		*myast;
+	istate = ((flags & EFL_IF) != 0);
 
-	myast = ast_pending();
+	if (enable) {
+		__asm__ volatile("sti;nop");
 
-	if ( (get_preemption_level() == 0) &&  (*myast & AST_URGENT) ) {
-	__asm__ volatile("sti");
-          __asm__ volatile ("int $0xff");
-        } else {
-	  __asm__ volatile ("sti");
+		if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT))
+			__asm__ volatile ("int %0" :: "N" (T_PREEMPT));
+	}
+	else {
+		if (istate)
+			__asm__ volatile("cli");
 	}
-  }
-  else {
-	__asm__ volatile("cli");
-  }
 
-  return (flags & EFL_IF) != 0;
+	return istate;
 }
 
 /* Check if running at interrupt context */
@@ -247,26 +299,40 @@ boolean_t ml_at_interrupt_context(void)
 	return get_interrupt_level() != 0;
 }
 
+void ml_get_power_state(boolean_t *icp, boolean_t *pidlep) {
+	*icp = (get_interrupt_level() != 0);
+	/* These will be technically inaccurate for interrupts that occur
+	 * successively within a single "idle exit" event, but shouldn't
+	 * matter statistically.
+	 */
+	*pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
+}
+
 /* Generate a fake interrupt */
 void ml_cause_interrupt(void)
 {
 	panic("ml_cause_interrupt not defined yet on Intel");
 }
 
+/*
+ * TODO: transition users of this to kernel_thread_start_priority
+ * ml_thread_policy is an unsupported KPI
+ */
 void ml_thread_policy(
 	thread_t thread,
 __unused	unsigned policy_id,
 	unsigned policy_info)
 {
 	if (policy_info & MACHINE_NETWORK_WORKLOOP) {
-		spl_t		s = splsched();
+		thread_precedence_policy_data_t info;
+		__assert_only kern_return_t kret;
 
-		thread_lock(thread);
+		info.importance = 1;
 
-		set_priority(thread, thread->priority + 1);
-
-		thread_unlock(thread);
-		splx(s);
+		kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
+		                                                (thread_policy_t)&info,
+		                                                THREAD_PRECEDENCE_POLICY_COUNT);
+		assert(kret == KERN_SUCCESS);
 	}
 }
 
@@ -298,6 +364,20 @@ machine_signal_idle(
 	cpu_interrupt(processor->cpu_id);
 }
 
+void
+machine_signal_idle_deferred(
+	__unused processor_t processor)
+{
+	panic("Unimplemented");
+}
+
+void
+machine_signal_idle_cancel(
+	__unused processor_t processor)
+{
+	panic("Unimplemented");
+}
+
 static kern_return_t
 register_cpu(
         uint32_t        lapic_id,
@@ -330,13 +410,16 @@ register_cpu(
 	if (this_cpu_datap->cpu_chud == NULL)
 		goto failed;
 
+#if KPC
+	if (kpc_register_cpu(this_cpu_datap) != TRUE)
+		goto failed;
+#endif
+
 	if (!boot_cpu) {
 		cpu_thread_alloc(this_cpu_datap->cpu_number);
 		if (this_cpu_datap->lcpu.core == NULL)
 			goto failed;
 
-		pmCPUStateInit();
-
 #if NCOPY_WINDOWS > 0
 		this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
 		if (this_cpu_datap->cpu_pmap == NULL)
@@ -364,6 +447,13 @@ failed:
 #endif
 	chudxnu_cpu_free(this_cpu_datap->cpu_chud);
 	console_cpu_free(this_cpu_datap->cpu_console_buf);
+#if KPC
+	kpc_counterbuf_free(this_cpu_datap->cpu_kpc_buf[0]);
+	kpc_counterbuf_free(this_cpu_datap->cpu_kpc_buf[1]);
+	kpc_counterbuf_free(this_cpu_datap->cpu_kpc_shadow);
+	kpc_counterbuf_free(this_cpu_datap->cpu_kpc_reload);
+#endif
+
 	return KERN_FAILURE;
 }
 
@@ -413,6 +503,12 @@ ml_processor_register(
     /* fix the CPU id */
     this_cpu_datap->cpu_id = cpu_id;
 
+    /* allocate and initialize other per-cpu structures */
+    if (!boot_cpu) {
+	mp_cpus_call_cpu_init(cpunum);
+	prng_cpu_init(cpunum);
+    }
+
     /* output arg */
     *processor_out = this_cpu_datap->cpu_processor;
 
@@ -434,8 +530,11 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 	 * Are we supporting MMX/SSE/SSE2/SSE3?
 	 * As distinct from whether the cpu has these capabilities.
 	 */
-	os_supports_sse = !!(get_cr4() & CR4_XMM);
-	if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
+	os_supports_sse = !!(get_cr4() & CR4_OSXMM);
+
+	if (ml_fpu_avx_enabled())
+		cpu_infop->vector_unit = 9;
+	else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
 		cpu_infop->vector_unit = 8;
 	else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse)
 		cpu_infop->vector_unit = 7;
@@ -523,9 +622,14 @@ ml_init_lock_timeout(void)
 {
 	uint64_t	abstime;
 	uint32_t	mtxspin;
+#if DEVELOPMENT || DEBUG
 	uint64_t	default_timeout_ns = NSEC_PER_SEC>>2;
+#else
+	uint64_t	default_timeout_ns = NSEC_PER_SEC>>1;
+#endif
 	uint32_t	slto;
-	
+	uint32_t	prt;
+
 	if (PE_parse_boot_argn("slto_us", &slto, sizeof (slto)))
 		default_timeout_ns = slto * NSEC_PER_USEC;
 
@@ -534,6 +638,26 @@ ml_init_lock_timeout(void)
 	LockTimeOut = (uint32_t) abstime;
 	LockTimeOutTSC = (uint32_t) tmrCvt(abstime, tscFCvtn2t);
 
+	/*
+	 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
+	 * LockTimeOut but can be overriden separately. In particular, a
+	 * zero value inhibits the timeout-panic and cuts a trace evnt instead
+	 * - see pmap_flush_tlbs().
+	 */
+	if (PE_parse_boot_argn("tlbto_us", &slto, sizeof (slto))) {
+		default_timeout_ns = slto * NSEC_PER_USEC;
+		nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+		TLBTimeOut = (uint32_t) abstime;
+	} else {
+		TLBTimeOut = LockTimeOut;
+	}
+
+	if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof (slto))) {
+		default_timeout_ns = slto * NSEC_PER_USEC;
+		nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+		reportphyreaddelayabs = abstime;
+	}
+
 	if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
 		if (mtxspin > USEC_PER_SEC>>4)
 			mtxspin =  USEC_PER_SEC>>4;
@@ -543,11 +667,33 @@ ml_init_lock_timeout(void)
 	}
 	MutexSpin = (unsigned int)abstime;
 
-	nanoseconds_to_absolutetime(2 * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+	nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+	if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof (prt)))
+		nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
+	virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
+	interrupt_latency_tracker_setup();
+	simple_lock_init(&ml_timer_evaluation_slock, 0);
 }
 
 /*
- * This is called from the machine-independent routine cpu_up()
+ * Threshold above which we should attempt to block
+ * instead of spinning for clock_delay_until().
+ */
+
+void
+ml_init_delay_spin_threshold(int threshold_us)
+{
+	nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
+}
+
+boolean_t
+ml_delay_should_spin(uint64_t interval)
+{
+	return (interval < delay_spin_threshold) ? TRUE : FALSE;
+}
+
+/*
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates. Defer to cpu_thread_init().
  */
 void
@@ -557,12 +703,14 @@ ml_cpu_up(void)
 }
 
 /*
- * This is called from the machine-independent routine cpu_down()
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates.
  */
 void
 ml_cpu_down(void)
 {
+	i386_deactivate_cpu();
+
 	return;
 }
 
@@ -613,17 +761,7 @@ void ml_cpu_set_ldt(int selector)
 	    current_cpu_datap()->cpu_ldt == KERNEL_LDT)
 		return;
 
-#if defined(__i386__)
-	/*
- 	 * If 64bit this requires a mode switch (and back). 
-	 */
-	if (cpu_mode_is64bit())
-		ml_64bit_lldt(selector);
-	else
-		lldt(selector);
-#else
 	lldt(selector);
-#endif
 	current_cpu_datap()->cpu_ldt = selector;
 }
 
@@ -648,44 +786,90 @@ vm_offset_t ml_stack_remaining(void)
 	}
 }
 
-#if MACH_KDB
+void
+kernel_preempt_check(void)
+{
+	boolean_t	intr;
+	unsigned long flags;
 
-/*
- *	Display the global msrs
- * *		
- *	ms
+	assert(get_preemption_level() == 0);
+
+	__asm__ volatile("pushf; pop	%0" :  "=r" (flags));
+
+	intr = ((flags & EFL_IF) != 0);
+
+	if ((*ast_pending() & AST_URGENT) && intr == TRUE) {
+		/*
+		 * can handle interrupts and preemptions 
+		 * at this point
+		 */
+
+		/*
+		 * now cause the PRE-EMPTION trap
+		 */
+		__asm__ volatile ("int %0" :: "N" (T_PREEMPT));
+	}
+}
+
+boolean_t machine_timeout_suspended(void) {
+	return (virtualized || pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake());
+}
+
+/* Eagerly evaluate all pending timer and thread callouts
  */
-void 
-db_msr(__unused db_expr_t addr,
-       __unused int have_addr,
-       __unused db_expr_t count,
-       __unused char *modif)
+void ml_timer_evaluate(void) {
+	KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+	uint64_t te_end, te_start = mach_absolute_time();
+	simple_lock(&ml_timer_evaluation_slock);
+	ml_timer_evaluation_in_progress = TRUE;
+	thread_call_delayed_timer_rescan_all();
+	mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
+	ml_timer_evaluation_in_progress = FALSE;
+	ml_timer_eager_evaluations++;
+	te_end = mach_absolute_time();
+	ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
+	simple_unlock(&ml_timer_evaluation_slock);
+
+	KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
+
+boolean_t
+ml_timer_forced_evaluation(void) {
+	return ml_timer_evaluation_in_progress;
+}
+
+/* 32-bit right-rotate n bits */
+static inline uint32_t ror32(uint32_t val, const unsigned int n)
+{	
+	__asm__ volatile("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
+	return val;
+}
+
+void
+ml_entropy_collect(void)
 {
+	uint32_t	tsc_lo, tsc_hi;
+	uint32_t	*ep;
 
-	uint32_t        i, msrlow, msrhigh;
+	assert(cpu_number() == master_cpu);
 
-	/* Try all of the first 4096 msrs */
-	for (i = 0; i < 4096; i++) {
-		if (!rdmsr_carefully(i, &msrlow, &msrhigh)) {
-			db_printf("%08X - %08X.%08X\n", i, msrhigh, msrlow);
-		}
-	}
+	/* update buffer pointer cyclically */
+	if (EntropyData.index_ptr - EntropyData.buffer == ENTROPY_BUFFER_SIZE)
+		ep = EntropyData.index_ptr = EntropyData.buffer;
+	else
+		ep = EntropyData.index_ptr++;
 
-	/* Try all of the 4096 msrs at 0x0C000000 */
-	for (i = 0; i < 4096; i++) {
-		if (!rdmsr_carefully(0x0C000000 | i, &msrlow, &msrhigh)) {
-			db_printf("%08X - %08X.%08X\n",
-				0x0C000000 | i, msrhigh, msrlow);
-		}
-	}
+	rdtsc_nofence(tsc_lo, tsc_hi);
+	*ep = ror32(*ep, 9) ^ tsc_lo;
+}
 
-	/* Try all of the 4096 msrs at 0xC0000000 */
-	for (i = 0; i < 4096; i++) {
-		if (!rdmsr_carefully(0xC0000000 | i, &msrlow, &msrhigh)) {
-			db_printf("%08X - %08X.%08X\n",
-				0xC0000000 | i, msrhigh, msrlow);
-		}
-	}
+void
+ml_gpu_stat_update(uint64_t gpu_ns_delta) {
+	current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
 }
 
-#endif
+uint64_t
+ml_gpu_stat(thread_t t) {
+	return t->machine.thread_gpu_ns;
+}