xnu-2422.100.13.tar.gz

[apple/xnu.git] / osfmk / i386 / machine_routines.c
diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c

index 24dc7afa8b15eac984583d094194222bace51d2e..06c57561c6401af357db22da96236f2a68d89799 100644 (file)
--- a/osfmk/i386/machine_routines.c
+++ b/osfmk/i386/machine_routines.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -36,25 +36,23 @@
  #include <kern/cpu_data.h>
  #include <kern/cpu_number.h>
  #include <kern/thread.h>
+#include <kern/thread_call.h>
  #include <i386/machine_cpu.h>
  #include <i386/lapic.h>
+#include <i386/lock.h>
  #include <i386/mp_events.h>
  #include <i386/pmCPU.h>
+#include <i386/trap.h>
  #include <i386/tsc.h>
  #include <i386/cpu_threads.h>
  #include <i386/proc_reg.h>
  #include <mach/vm_param.h>
  #include <i386/pmap.h>
+#include <i386/pmap_internal.h>
  #include <i386/misc_protos.h>
-#if MACH_KDB
-#include <machine/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
+#include <kern/timer_queue.h>
+#if KPC
+#include <kern/kpc.h>
  #endif
  
  #if DEBUG
@@ -63,7 +61,6 @@
  #define DBG(x...)
  #endif
  
-
  extern void    wakeup(void *);
  
  static int max_cpus_initialized = 0;
@@ -72,6 +69,17 @@ unsigned int LockTimeOut;
  unsigned int   LockTimeOutTSC;
  unsigned int   MutexSpin;
  uint64_t       LastDebuggerEntryAllowance;
+uint64_t       delay_spin_threshold;
+
+extern uint64_t panic_restart_timeout;
+
+boolean_t virtualized = FALSE;
+
+decl_simple_lock_data(static,  ml_timer_evaluation_slock);
+uint32_t ml_timer_eager_evaluations;
+uint64_t ml_timer_eager_evaluation_max;
+static boolean_t ml_timer_evaluation_in_progress = FALSE;
+
  
  #define MAX_CPUS_SET    0x1
  #define MAX_CPUS_WAIT   0x2
@@ -96,8 +104,8 @@ vm_offset_t ml_static_malloc(
  
  void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
  {
-        *phys_addr = bounce_pool_base;
-       *size      = bounce_pool_size;
+        *phys_addr = 0;
+       *size      = 0;
  }
  
  
@@ -124,14 +132,13 @@ ml_static_mfree(
  {
         addr64_t vaddr_cur;
         ppnum_t ppn;
-
+       uint32_t freed_pages = 0;
         assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
  
         assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */
  
-
         for (vaddr_cur = vaddr;
-            vaddr_cur < round_page_64(vaddr+size);
+            vaddr_cur < round_page_64(vaddr+size);
              vaddr_cur += PAGE_SIZE) {
                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
                 if (ppn != (vm_offset_t)NULL) {
@@ -142,10 +149,18 @@ ml_static_mfree(
                                         kernel_pmap->stats.resident_count;
                         }
                         pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur+PAGE_SIZE);
-                       vm_page_create(ppn,(ppn+1));
-                       vm_page_wire_count--;
+                       assert(pmap_valid_page(ppn));
+
+                       if (IS_MANAGED_PAGE(ppn)) {
+                               vm_page_create(ppn,(ppn+1));
+                               vm_page_wire_count--;
+                               freed_pages++;
+                       }
                 }
         }
+#if    DEBUG   
+       kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+#endif
  }
  
  
@@ -196,6 +211,38 @@ vm_size_t ml_nofault_copy(
         return nbytes;
  }
  
+/*
+ *     Routine:        ml_validate_nofault
+ *     Function: Validate that ths address range has a valid translations
+ *                     in the kernel pmap.  If translations are present, they are
+ *                     assumed to be wired; i.e. no attempt is made to guarantee
+ *                     that the translation persist after the check.
+ *  Returns: TRUE if the range is mapped and will not cause a fault,
+ *                     FALSE otherwise.
+ */
+
+boolean_t ml_validate_nofault(
+       vm_offset_t virtsrc, vm_size_t size)
+{
+       addr64_t cur_phys_src;
+       uint32_t count;
+
+       while (size > 0) {
+               if (!(cur_phys_src = kvtophys(virtsrc)))
+                       return FALSE;
+               if (!pmap_valid_page(i386_btop(cur_phys_src)))
+                       return FALSE;
+               count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
+               if (count > size)
+                       count = (uint32_t)size;
+
+               virtsrc += count;
+               size -= count;
+       }
+
+       return TRUE;
+}
+
  /* Interrupt handling */
  
  /* Initialize Interrupts */
@@ -205,7 +252,6 @@ void ml_init_interrupt(void)
  }
  
  
-
  /* Get Interrupts Enabled */
  boolean_t ml_get_interrupts_enabled(void)
  {
@@ -218,27 +264,27 @@ boolean_t ml_get_interrupts_enabled(void)
  /* Set Interrupts Enabled */
  boolean_t ml_set_interrupts_enabled(boolean_t enable)
  {
-  unsigned long flags;
+       unsigned long flags;
+       boolean_t istate;
+       
+       __asm__ volatile("pushf; pop    %0" :  "=r" (flags));
  
-  __asm__ volatile("pushf; pop %0" :  "=r" (flags));
+       assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
  
-  if (enable) {
-       ast_t           *myast;
+       istate = ((flags & EFL_IF) != 0);
  
-       myast = ast_pending();
+       if (enable) {
+               __asm__ volatile("sti;nop");
  
-       if ( (get_preemption_level() == 0) &&  (*myast & AST_URGENT) ) {
-       __asm__ volatile("sti");
-          __asm__ volatile ("int $0xff");
-        } else {
-         __asm__ volatile ("sti");
+               if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT))
+                       __asm__ volatile ("int $0xff");
+       }
+       else {
+               if (istate)
+                       __asm__ volatile("cli");
         }
-  }
-  else {
-       __asm__ volatile("cli");
-  }
  
-  return (flags & EFL_IF) != 0;
+       return istate;
  }
  
  /* Check if running at interrupt context */
@@ -247,6 +293,15 @@ boolean_t ml_at_interrupt_context(void)
         return get_interrupt_level() != 0;
  }
  
+void ml_get_power_state(boolean_t *icp, boolean_t *pidlep) {
+       *icp = (get_interrupt_level() != 0);
+       /* These will be technically inaccurate for interrupts that occur
+        * successively within a single "idle exit" event, but shouldn't
+        * matter statistically.
+        */
+       *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
+}
+
  /* Generate a fake interrupt */
  void ml_cause_interrupt(void)
  {
@@ -330,6 +385,23 @@ register_cpu(
         if (this_cpu_datap->cpu_chud == NULL)
                 goto failed;
  
+#if KPC
+       this_cpu_datap->cpu_kpc_buf[0] = kpc_counterbuf_alloc();
+       if(this_cpu_datap->cpu_kpc_buf[0] == NULL )
+               goto failed;
+       this_cpu_datap->cpu_kpc_buf[1] = kpc_counterbuf_alloc();
+       if(this_cpu_datap->cpu_kpc_buf[1] == NULL )
+               goto failed;
+
+       this_cpu_datap->cpu_kpc_shadow = kpc_counterbuf_alloc();
+       if(this_cpu_datap->cpu_kpc_shadow == NULL )
+               goto failed;
+
+       this_cpu_datap->cpu_kpc_reload = kpc_counterbuf_alloc();
+       if(this_cpu_datap->cpu_kpc_reload == NULL )
+               goto failed;
+#endif
+
         if (!boot_cpu) {
                 cpu_thread_alloc(this_cpu_datap->cpu_number);
                 if (this_cpu_datap->lcpu.core == NULL)
@@ -362,6 +434,13 @@ failed:
  #endif
         chudxnu_cpu_free(this_cpu_datap->cpu_chud);
         console_cpu_free(this_cpu_datap->cpu_console_buf);
+#if KPC
+       kpc_counterbuf_free(this_cpu_datap->cpu_kpc_buf[0]);
+       kpc_counterbuf_free(this_cpu_datap->cpu_kpc_buf[1]);
+       kpc_counterbuf_free(this_cpu_datap->cpu_kpc_shadow);
+       kpc_counterbuf_free(this_cpu_datap->cpu_kpc_reload);
+#endif
+
         return KERN_FAILURE;
  }
  
@@ -432,8 +511,11 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
          * Are we supporting MMX/SSE/SSE2/SSE3?
          * As distinct from whether the cpu has these capabilities.
          */
-       os_supports_sse = !!(get_cr4() & CR4_XMM);
-       if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
+       os_supports_sse = !!(get_cr4() & CR4_OSXMM);
+
+       if (ml_fpu_avx_enabled())
+               cpu_infop->vector_unit = 9;
+       else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
                 cpu_infop->vector_unit = 8;
         else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse)
                 cpu_infop->vector_unit = 7;
@@ -523,7 +605,8 @@ ml_init_lock_timeout(void)
         uint32_t        mtxspin;
         uint64_t        default_timeout_ns = NSEC_PER_SEC>>2;
         uint32_t        slto;
-       
+       uint32_t        prt;
+
         if (PE_parse_boot_argn("slto_us", &slto, sizeof (slto)))
                 default_timeout_ns = slto * NSEC_PER_USEC;
  
@@ -541,11 +624,33 @@ ml_init_lock_timeout(void)
         }
         MutexSpin = (unsigned int)abstime;
  
-       nanoseconds_to_absolutetime(2 * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+       nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+       if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof (prt)))
+               nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
+       virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
+       interrupt_latency_tracker_setup();
+       simple_lock_init(&ml_timer_evaluation_slock, 0);
+}
+
+/*
+ * Threshold above which we should attempt to block
+ * instead of spinning for clock_delay_until().
+ */
+
+void
+ml_init_delay_spin_threshold(int threshold_us)
+{
+       nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
+}
+
+boolean_t
+ml_delay_should_spin(uint64_t interval)
+{
+       return (interval < delay_spin_threshold) ? TRUE : FALSE;
  }
  
  /*
- * This is called from the machine-independent routine cpu_up()
+ * This is called from the machine-independent layer
   * to perform machine-dependent info updates. Defer to cpu_thread_init().
   */
  void
@@ -555,12 +660,14 @@ ml_cpu_up(void)
  }
  
  /*
- * This is called from the machine-independent routine cpu_down()
+ * This is called from the machine-independent layer
   * to perform machine-dependent info updates.
   */
  void
  ml_cpu_down(void)
  {
+       i386_deactivate_cpu();
+
         return;
  }
  
@@ -611,17 +718,7 @@ void ml_cpu_set_ldt(int selector)
             current_cpu_datap()->cpu_ldt == KERNEL_LDT)
                 return;
  
-#if defined(__i386__)
-       /*
-        * If 64bit this requires a mode switch (and back). 
-        */
-       if (cpu_mode_is64bit())
-               ml_64bit_lldt(selector);
-       else
-               lldt(selector);
-#else
         lldt(selector);
-#endif
         current_cpu_datap()->cpu_ldt = selector;
  }
  
@@ -646,44 +743,55 @@ vm_offset_t ml_stack_remaining(void)
         }
  }
  
-#if MACH_KDB
-
-/*
- *     Display the global msrs
- * *           
- *     ms
- */
-void 
-db_msr(__unused db_expr_t addr,
-       __unused int have_addr,
-       __unused db_expr_t count,
-       __unused char *modif)
+void
+kernel_preempt_check(void)
  {
+       boolean_t       intr;
+       unsigned long flags;
  
-       uint32_t        i, msrlow, msrhigh;
+       assert(get_preemption_level() == 0);
  
-       /* Try all of the first 4096 msrs */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n", i, msrhigh, msrlow);
-               }
-       }
+       __asm__ volatile("pushf; pop    %0" :  "=r" (flags));
  
-       /* Try all of the 4096 msrs at 0x0C000000 */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(0x0C000000 | i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n",
-                               0x0C000000 | i, msrhigh, msrlow);
-               }
-       }
+       intr = ((flags & EFL_IF) != 0);
  
-       /* Try all of the 4096 msrs at 0xC0000000 */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(0xC0000000 | i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n",
-                               0xC0000000 | i, msrhigh, msrlow);
-               }
+       if ((*ast_pending() & AST_URGENT) && intr == TRUE) {
+               /*
+                * can handle interrupts and preemptions 
+                * at this point
+                */
+
+               /*
+                * now cause the PRE-EMPTION trap
+                */
+               __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
         }
  }
  
-#endif
+boolean_t machine_timeout_suspended(void) {
+       return (virtualized || pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity());
+}
+
+/* Eagerly evaluate all pending timer and thread callouts
+ */
+void ml_timer_evaluate(void) {
+       KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+       uint64_t te_end, te_start = mach_absolute_time();
+       simple_lock(&ml_timer_evaluation_slock);
+       ml_timer_evaluation_in_progress = TRUE;
+       thread_call_delayed_timer_rescan_all();
+       mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
+       ml_timer_evaluation_in_progress = FALSE;
+       ml_timer_eager_evaluations++;
+       te_end = mach_absolute_time();
+       ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
+       simple_unlock(&ml_timer_evaluation_slock);
+
+       KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
+
+boolean_t
+ml_timer_forced_evaluation(void) {
+       return ml_timer_evaluation_in_progress;
+}