]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/machine_routines.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / i386 / machine_routines.c
index d42f6d2f1f9aaf71361ac24d07b510a6b865b8b2..76652946c7e98a76dce65f850c51356d66228ddb 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <mach/processor.h>
 #include <kern/processor.h>
 #include <kern/machine.h>
 #include <mach/processor.h>
 #include <kern/processor.h>
 #include <kern/machine.h>
-#include <kern/cpu_data.h>
+
 #include <kern/cpu_number.h>
 #include <kern/thread.h>
 #include <kern/cpu_number.h>
 #include <kern/thread.h>
-#include <i386/cpu_data.h>
+#include <kern/thread_call.h>
+#include <kern/policy_internal.h>
+
+#include <prng/random.h>
 #include <i386/machine_cpu.h>
 #include <i386/lapic.h>
 #include <i386/machine_cpu.h>
 #include <i386/lapic.h>
+#include <i386/bit_routines.h>
 #include <i386/mp_events.h>
 #include <i386/mp_events.h>
-#include <i386/pmap.h>
-#include <i386/misc_protos.h>
 #include <i386/pmCPU.h>
 #include <i386/pmCPU.h>
-#include <i386/proc_reg.h>
+#include <i386/trap.h>
 #include <i386/tsc.h>
 #include <i386/cpu_threads.h>
 #include <i386/tsc.h>
 #include <i386/cpu_threads.h>
+#include <i386/proc_reg.h>
 #include <mach/vm_param.h>
 #include <mach/vm_param.h>
-#if MACH_KDB
-#include <i386/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
+#include <i386/pmap.h>
+#include <i386/pmap_internal.h>
+#include <i386/misc_protos.h>
+#include <kern/timer_queue.h>
+#if KPC
+#include <kern/kpc.h>
 #endif
 #endif
-
+#include <architecture/i386/pio.h>
+#include <i386/cpu_data.h>
 #if DEBUG
 #define DBG(x...)      kprintf("DBG: " x)
 #else
 #define DBG(x...)
 #endif
 
 #if DEBUG
 #define DBG(x...)      kprintf("DBG: " x)
 #else
 #define DBG(x...)
 #endif
 
-extern thread_t        Shutdown_context(thread_t thread, void (*doshutdown)(processor_t),processor_t  processor);
 extern void    wakeup(void *);
 extern void    wakeup(void *);
-extern unsigned KernelRelocOffset;
 
 static int max_cpus_initialized = 0;
 
 
 static int max_cpus_initialized = 0;
 
-unsigned int   LockTimeOut;
-unsigned int   LockTimeOutTSC;
-unsigned int   MutexSpin;
+uint64_t       LockTimeOut;
+uint64_t       TLBTimeOut;
+uint64_t       LockTimeOutTSC;
+uint32_t       LockTimeOutUsec;
+uint64_t       MutexSpin;
+uint64_t       LastDebuggerEntryAllowance;
+uint64_t       delay_spin_threshold;
+
+extern uint64_t panic_restart_timeout;
+
+boolean_t virtualized = FALSE;
+
+decl_simple_lock_data(static,  ml_timer_evaluation_slock);
+uint32_t ml_timer_eager_evaluations;
+uint64_t ml_timer_eager_evaluation_max;
+static boolean_t ml_timer_evaluation_in_progress = FALSE;
+
 
 #define MAX_CPUS_SET    0x1
 #define MAX_CPUS_WAIT   0x2
 
 #define MAX_CPUS_SET    0x1
 #define MAX_CPUS_WAIT   0x2
@@ -97,23 +110,20 @@ vm_offset_t ml_static_malloc(
 
 void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
 {
 
 void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
 {
-        *phys_addr = bounce_pool_base;
-       *size      = bounce_pool_size;
+        *phys_addr = 0;
+       *size      = 0;
 }
 
 
 }
 
 
-vm_offset_t
-ml_boot_ptovirt(
-       vm_offset_t paddr)
-{
-       return (vm_offset_t)((paddr-KernelRelocOffset) | LINEAR_KERNEL_ADDRESS);
-} 
-
 vm_offset_t
 ml_static_ptovirt(
        vm_offset_t paddr)
 {
 vm_offset_t
 ml_static_ptovirt(
        vm_offset_t paddr)
 {
-    return (vm_offset_t)((unsigned) paddr | LINEAR_KERNEL_ADDRESS);
+#if defined(__x86_64__)
+       return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
+#else
+       return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
+#endif
 } 
 
 
 } 
 
 
@@ -126,17 +136,17 @@ ml_static_mfree(
        vm_offset_t vaddr,
        vm_size_t size)
 {
        vm_offset_t vaddr,
        vm_size_t size)
 {
-       vm_offset_t vaddr_cur;
+       addr64_t vaddr_cur;
        ppnum_t ppn;
        ppnum_t ppn;
-
-//     if (vaddr < VM_MIN_KERNEL_ADDRESS) return;
+       uint32_t freed_pages = 0;
+       assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
 
        assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */
 
        for (vaddr_cur = vaddr;
 
        assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */
 
        for (vaddr_cur = vaddr;
-            vaddr_cur < round_page_32(vaddr+size);
+            vaddr_cur < round_page_64(vaddr+size);
             vaddr_cur += PAGE_SIZE) {
             vaddr_cur += PAGE_SIZE) {
-               ppn = pmap_find_phys(kernel_pmap, (addr64_t)vaddr_cur);
+               ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
                if (ppn != (vm_offset_t)NULL) {
                        kernel_pmap->stats.resident_count++;
                        if (kernel_pmap->stats.resident_count >
                if (ppn != (vm_offset_t)NULL) {
                        kernel_pmap->stats.resident_count++;
                        if (kernel_pmap->stats.resident_count >
@@ -144,11 +154,22 @@ ml_static_mfree(
                                kernel_pmap->stats.resident_max =
                                        kernel_pmap->stats.resident_count;
                        }
                                kernel_pmap->stats.resident_max =
                                        kernel_pmap->stats.resident_count;
                        }
-                       pmap_remove(kernel_pmap, (addr64_t)vaddr_cur, (addr64_t)(vaddr_cur+PAGE_SIZE));
-                       vm_page_create(ppn,(ppn+1));
-                       vm_page_wire_count--;
+                       pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur+PAGE_SIZE);
+                       assert(pmap_valid_page(ppn));
+                       if (IS_MANAGED_PAGE(ppn)) {
+                               vm_page_create(ppn,(ppn+1));
+                               freed_pages++;
+                       }
                }
        }
                }
        }
+       vm_page_lockspin_queues();
+       vm_page_wire_count -= freed_pages;
+       vm_page_wire_count_initial -= freed_pages;
+       vm_page_unlock_queues();
+
+#if    DEBUG   
+       kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+#endif
 }
 
 
 }
 
 
@@ -156,7 +177,7 @@ ml_static_mfree(
 vm_offset_t ml_vtophys(
        vm_offset_t vaddr)
 {
 vm_offset_t ml_vtophys(
        vm_offset_t vaddr)
 {
-       return  kvtophys(vaddr);
+       return  (vm_offset_t)kvtophys(vaddr);
 }
 
 /*
 }
 
 /*
@@ -182,11 +203,11 @@ vm_size_t ml_nofault_copy(
                        break;
                if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src)))
                        break;
                        break;
                if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src)))
                        break;
-               count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
+               count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
                if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK)))
                if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK)))
-                       count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
+                       count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
                if (count > size)
                if (count > size)
-                       count = size;
+                       count = (uint32_t)size;
 
                bcopy_phys(cur_phys_src, cur_phys_dst, count);
 
 
                bcopy_phys(cur_phys_src, cur_phys_dst, count);
 
@@ -199,6 +220,38 @@ vm_size_t ml_nofault_copy(
        return nbytes;
 }
 
        return nbytes;
 }
 
+/*
+ *     Routine:        ml_validate_nofault
+ *     Function: Validate that ths address range has a valid translations
+ *                     in the kernel pmap.  If translations are present, they are
+ *                     assumed to be wired; i.e. no attempt is made to guarantee
+ *                     that the translation persist after the check.
+ *  Returns: TRUE if the range is mapped and will not cause a fault,
+ *                     FALSE otherwise.
+ */
+
+boolean_t ml_validate_nofault(
+       vm_offset_t virtsrc, vm_size_t size)
+{
+       addr64_t cur_phys_src;
+       uint32_t count;
+
+       while (size > 0) {
+               if (!(cur_phys_src = kvtophys(virtsrc)))
+                       return FALSE;
+               if (!pmap_valid_page(i386_btop(cur_phys_src)))
+                       return FALSE;
+               count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
+               if (count > size)
+                       count = (uint32_t)size;
+
+               virtsrc += count;
+               size -= count;
+       }
+
+       return TRUE;
+}
+
 /* Interrupt handling */
 
 /* Initialize Interrupts */
 /* Interrupt handling */
 
 /* Initialize Interrupts */
@@ -207,39 +260,40 @@ void ml_init_interrupt(void)
        (void) ml_set_interrupts_enabled(TRUE);
 }
 
        (void) ml_set_interrupts_enabled(TRUE);
 }
 
+
 /* Get Interrupts Enabled */
 boolean_t ml_get_interrupts_enabled(void)
 {
   unsigned long flags;
 
 /* Get Interrupts Enabled */
 boolean_t ml_get_interrupts_enabled(void)
 {
   unsigned long flags;
 
-  __asm__ volatile("pushf; popl        %0" :  "=r" (flags));
+  __asm__ volatile("pushf; pop %0" :  "=r" (flags));
   return (flags & EFL_IF) != 0;
 }
 
 /* Set Interrupts Enabled */
 boolean_t ml_set_interrupts_enabled(boolean_t enable)
 {
   return (flags & EFL_IF) != 0;
 }
 
 /* Set Interrupts Enabled */
 boolean_t ml_set_interrupts_enabled(boolean_t enable)
 {
-  unsigned long flags;
+       unsigned long flags;
+       boolean_t istate;
+       
+       __asm__ volatile("pushf; pop    %0" :  "=r" (flags));
 
 
-  __asm__ volatile("pushf; popl        %0" :  "=r" (flags));
+       assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
 
 
-  if (enable) {
-       ast_t           *myast;
+       istate = ((flags & EFL_IF) != 0);
 
 
-       myast = ast_pending();
+       if (enable) {
+               __asm__ volatile("sti;nop");
 
 
-       if ( (get_preemption_level() == 0) &&  (*myast & AST_URGENT) ) {
-       __asm__ volatile("sti");
-          __asm__ volatile ("int $0xff");
-        } else {
-         __asm__ volatile ("sti");
+               if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT))
+                       __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
+       }
+       else {
+               if (istate)
+                       __asm__ volatile("cli");
        }
        }
-  }
-  else {
-       __asm__ volatile("cli");
-  }
 
 
-  return (flags & EFL_IF) != 0;
+       return istate;
 }
 
 /* Check if running at interrupt context */
 }
 
 /* Check if running at interrupt context */
@@ -248,26 +302,40 @@ boolean_t ml_at_interrupt_context(void)
        return get_interrupt_level() != 0;
 }
 
        return get_interrupt_level() != 0;
 }
 
+void ml_get_power_state(boolean_t *icp, boolean_t *pidlep) {
+       *icp = (get_interrupt_level() != 0);
+       /* These will be technically inaccurate for interrupts that occur
+        * successively within a single "idle exit" event, but shouldn't
+        * matter statistically.
+        */
+       *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
+}
+
 /* Generate a fake interrupt */
 void ml_cause_interrupt(void)
 {
        panic("ml_cause_interrupt not defined yet on Intel");
 }
 
 /* Generate a fake interrupt */
 void ml_cause_interrupt(void)
 {
        panic("ml_cause_interrupt not defined yet on Intel");
 }
 
+/*
+ * TODO: transition users of this to kernel_thread_start_priority
+ * ml_thread_policy is an unsupported KPI
+ */
 void ml_thread_policy(
        thread_t thread,
 __unused       unsigned policy_id,
        unsigned policy_info)
 {
        if (policy_info & MACHINE_NETWORK_WORKLOOP) {
 void ml_thread_policy(
        thread_t thread,
 __unused       unsigned policy_id,
        unsigned policy_info)
 {
        if (policy_info & MACHINE_NETWORK_WORKLOOP) {
-               spl_t           s = splsched();
-
-               thread_lock(thread);
+               thread_precedence_policy_data_t info;
+               __assert_only kern_return_t kret;
 
 
-               set_priority(thread, thread->priority + 1);
+               info.importance = 1;
 
 
-               thread_unlock(thread);
-               splx(s);
+               kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
+                                                               (thread_policy_t)&info,
+                                                               THREAD_PRECEDENCE_POLICY_COUNT);
+               assert(kret == KERN_SUCCESS);
        }
 }
 
        }
 }
 
@@ -296,27 +364,28 @@ void
 machine_signal_idle(
         processor_t processor)
 {
 machine_signal_idle(
         processor_t processor)
 {
-       cpu_interrupt(PROCESSOR_DATA(processor, slot_num));
+       cpu_interrupt(processor->cpu_id);
 }
 
 }
 
-thread_t        
-machine_processor_shutdown(
-       thread_t        thread,
-       void            (*doshutdown)(processor_t),
-       processor_t     processor)
+void
+machine_signal_idle_deferred(
+       __unused processor_t processor)
 {
 {
-       vmx_suspend();
-       fpu_save_context(thread);
-       return(Shutdown_context(thread, doshutdown, processor));
+       panic("Unimplemented");
 }
 
 }
 
-kern_return_t
-ml_processor_register(
-       cpu_id_t        cpu_id,
-       uint32_t        lapic_id,
-       processor_t     *processor_out,
-       ipi_handler_t   *ipi_handler,
-       boolean_t       boot_cpu)
+void
+machine_signal_idle_cancel(
+       __unused processor_t processor)
+{
+       panic("Unimplemented");
+}
+
+static kern_return_t
+register_cpu(
+        uint32_t        lapic_id,
+       processor_t     *processor_out,
+       boolean_t       boot_cpu )
 {
        int             target_cpu;
        cpu_data_t      *this_cpu_datap;
 {
        int             target_cpu;
        cpu_data_t      *this_cpu_datap;
@@ -331,7 +400,9 @@ ml_processor_register(
 
        lapic_cpu_map(lapic_id, target_cpu);
 
 
        lapic_cpu_map(lapic_id, target_cpu);
 
-       this_cpu_datap->cpu_id = cpu_id;
+       /* The cpu_id is not known at registration phase. Just do
+        * lapic_id for now 
+        */
        this_cpu_datap->cpu_phys_number = lapic_id;
 
        this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
        this_cpu_datap->cpu_phys_number = lapic_id;
 
        this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
@@ -342,16 +413,21 @@ ml_processor_register(
        if (this_cpu_datap->cpu_chud == NULL)
                goto failed;
 
        if (this_cpu_datap->cpu_chud == NULL)
                goto failed;
 
+#if KPC
+       if (kpc_register_cpu(this_cpu_datap) != TRUE)
+               goto failed;
+#endif
+
        if (!boot_cpu) {
                cpu_thread_alloc(this_cpu_datap->cpu_number);
                if (this_cpu_datap->lcpu.core == NULL)
                        goto failed;
 
        if (!boot_cpu) {
                cpu_thread_alloc(this_cpu_datap->cpu_number);
                if (this_cpu_datap->lcpu.core == NULL)
                        goto failed;
 
-               pmCPUStateInit();
-
+#if NCOPY_WINDOWS > 0
                this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
                if (this_cpu_datap->cpu_pmap == NULL)
                        goto failed;
                this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
                if (this_cpu_datap->cpu_pmap == NULL)
                        goto failed;
+#endif
 
                this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu);
                if (this_cpu_datap->cpu_processor == NULL)
 
                this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu);
                if (this_cpu_datap->cpu_processor == NULL)
@@ -364,27 +440,83 @@ ml_processor_register(
        }
 
        *processor_out = this_cpu_datap->cpu_processor;
        }
 
        *processor_out = this_cpu_datap->cpu_processor;
-       *ipi_handler = NULL;
-
-       if (target_cpu == machine_info.max_cpus - 1) {
-               /*
-                * All processors are now registered but not started (except
-                * for this "in-limbo" boot processor). We call to the machine
-                * topology code to finalize and activate the topology.
-                */
-               cpu_topology_start();
-       }
 
        return KERN_SUCCESS;
 
 failed:
        cpu_processor_free(this_cpu_datap->cpu_processor);
 
        return KERN_SUCCESS;
 
 failed:
        cpu_processor_free(this_cpu_datap->cpu_processor);
+#if NCOPY_WINDOWS > 0
        pmap_cpu_free(this_cpu_datap->cpu_pmap);
        pmap_cpu_free(this_cpu_datap->cpu_pmap);
+#endif
        chudxnu_cpu_free(this_cpu_datap->cpu_chud);
        console_cpu_free(this_cpu_datap->cpu_console_buf);
        chudxnu_cpu_free(this_cpu_datap->cpu_chud);
        console_cpu_free(this_cpu_datap->cpu_console_buf);
+#if KPC
+       kpc_unregister_cpu(this_cpu_datap);
+#endif
+
        return KERN_FAILURE;
 }
 
        return KERN_FAILURE;
 }
 
+
+kern_return_t
+ml_processor_register(
+        cpu_id_t        cpu_id,
+        uint32_t        lapic_id,
+        processor_t     *processor_out,
+        boolean_t       boot_cpu,
+       boolean_t       start )
+{
+    static boolean_t done_topo_sort = FALSE;
+    static uint32_t num_registered = 0;
+
+    /* Register all CPUs first, and track max */
+    if( start == FALSE )
+    {
+       num_registered++;
+
+       DBG( "registering CPU lapic id %d\n", lapic_id );
+
+       return register_cpu( lapic_id, processor_out, boot_cpu );
+    }
+
+    /* Sort by topology before we start anything */
+    if( !done_topo_sort )
+    {
+       DBG( "about to start CPUs. %d registered\n", num_registered );
+
+       cpu_topology_sort( num_registered );
+       done_topo_sort = TRUE;
+    }
+
+    /* Assign the cpu ID */
+    uint32_t cpunum = -1;
+    cpu_data_t *this_cpu_datap = NULL;
+
+    /* find cpu num and pointer */
+    cpunum = ml_get_cpuid( lapic_id );
+
+    if( cpunum == 0xFFFFFFFF ) /* never heard of it? */
+       panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
+
+    this_cpu_datap = cpu_datap(cpunum);
+
+    /* fix the CPU id */
+    this_cpu_datap->cpu_id = cpu_id;
+
+    /* allocate and initialize other per-cpu structures */
+    if (!boot_cpu) {
+       mp_cpus_call_cpu_init(cpunum);
+       prng_cpu_init(cpunum);
+    }
+
+    /* output arg */
+    *processor_out = this_cpu_datap->cpu_processor;
+
+    /* OK, try and start this CPU */
+    return cpu_topology_start_cpu( cpunum );
+}
+
+
 void
 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 {
 void
 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 {
@@ -398,8 +530,11 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
         * Are we supporting MMX/SSE/SSE2/SSE3?
         * As distinct from whether the cpu has these capabilities.
         */
         * Are we supporting MMX/SSE/SSE2/SSE3?
         * As distinct from whether the cpu has these capabilities.
         */
-       os_supports_sse = get_cr4() & CR4_XMM;
-       if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
+       os_supports_sse = !!(get_cr4() & CR4_OSXMM);
+
+       if (ml_fpu_avx_enabled())
+               cpu_infop->vector_unit = 9;
+       else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
                cpu_infop->vector_unit = 8;
        else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse)
                cpu_infop->vector_unit = 7;
                cpu_infop->vector_unit = 8;
        else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse)
                cpu_infop->vector_unit = 7;
@@ -454,7 +589,7 @@ ml_init_max_cpus(unsigned long max_cpus)
                         * that the kernel supports or that the "cpus="
                         * boot-arg has set. Here we take int minimum.
                         */
                         * that the kernel supports or that the "cpus="
                         * boot-arg has set. Here we take int minimum.
                         */
-                        machine_info.max_cpus = MIN(max_cpus, max_ncpus);
+                        machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
                }
                 if (max_cpus_initialized == MAX_CPUS_WAIT)
                         wakeup((event_t)&max_cpus_initialized);
                }
                 if (max_cpus_initialized == MAX_CPUS_WAIT)
                         wakeup((event_t)&max_cpus_initialized);
@@ -477,7 +612,6 @@ ml_get_max_cpus(void)
         (void) ml_set_interrupts_enabled(current_state);
         return(machine_info.max_cpus);
 }
         (void) ml_set_interrupts_enabled(current_state);
         return(machine_info.max_cpus);
 }
-
 /*
  *     Routine:        ml_init_lock_timeout
  *     Function:
 /*
  *     Routine:        ml_init_lock_timeout
  *     Function:
@@ -486,12 +620,49 @@ void
 ml_init_lock_timeout(void)
 {
        uint64_t        abstime;
 ml_init_lock_timeout(void)
 {
        uint64_t        abstime;
-       uint32_t        mtxspin; 
+       uint32_t        mtxspin;
+#if DEVELOPMENT || DEBUG
+       uint64_t        default_timeout_ns = NSEC_PER_SEC>>2;
+#else
+       uint64_t        default_timeout_ns = NSEC_PER_SEC>>1;
+#endif
+       uint32_t        slto;
+       uint32_t        prt;
+
+       if (PE_parse_boot_argn("slto_us", &slto, sizeof (slto)))
+               default_timeout_ns = slto * NSEC_PER_USEC;
 
 
-       /* LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks */
-       nanoseconds_to_absolutetime(NSEC_PER_SEC>>2, &abstime);
-       LockTimeOut = (uint32_t) abstime;
-       LockTimeOutTSC = (uint32_t) tmrCvt(abstime, tscFCvtn2t);
+       /*
+        * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
+        * and LockTimeOutUsec is in microseconds and it's 32-bits.
+        */
+       LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
+       nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+       LockTimeOut = abstime;
+       LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
+
+       /*
+        * TLBTimeOut dictates the TLB flush timeout period. It defaults to
+        * LockTimeOut but can be overriden separately. In particular, a
+        * zero value inhibits the timeout-panic and cuts a trace evnt instead
+        * - see pmap_flush_tlbs().
+        */
+       if (PE_parse_boot_argn("tlbto_us", &slto, sizeof (slto))) {
+               default_timeout_ns = slto * NSEC_PER_USEC;
+               nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+               TLBTimeOut = (uint32_t) abstime;
+       } else {
+               TLBTimeOut = LockTimeOut;
+       }
+
+#if DEVELOPMENT || DEBUG
+       reportphyreaddelayabs = LockTimeOut;
+#endif
+       if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof (slto))) {
+               default_timeout_ns = slto * NSEC_PER_USEC;
+               nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+               reportphyreaddelayabs = abstime;
+       }
 
        if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
                if (mtxspin > USEC_PER_SEC>>4)
 
        if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
                if (mtxspin > USEC_PER_SEC>>4)
@@ -501,10 +672,65 @@ ml_init_lock_timeout(void)
                nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime);
        }
        MutexSpin = (unsigned int)abstime;
                nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime);
        }
        MutexSpin = (unsigned int)abstime;
+
+       nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+       if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof (prt)))
+               nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
+
+       virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
+       if (virtualized) {
+               int     vti;
+               
+               if (!PE_parse_boot_argn("vti", &vti, sizeof (vti)))
+                       vti = 6;
+               printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
+               kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
+#define VIRTUAL_TIMEOUT_INFLATE64(_timeout)                    \
+MACRO_BEGIN                                                    \
+       kprintf("%24s: 0x%016llx ", #_timeout, _timeout);       \
+       _timeout <<= vti;                                       \
+       kprintf("-> 0x%016llx\n",  _timeout);                   \
+MACRO_END
+#define VIRTUAL_TIMEOUT_INFLATE32(_timeout)                    \
+MACRO_BEGIN                                                    \
+       kprintf("%24s:         0x%08x ", #_timeout, _timeout);  \
+       if ((_timeout <<vti) >> vti == _timeout)                \
+               _timeout <<= vti;                               \
+       else                                                    \
+               _timeout = ~0; /* cap rather than overflow */   \
+       kprintf("-> 0x%08x\n",  _timeout);                      \
+MACRO_END
+               VIRTUAL_TIMEOUT_INFLATE32(LockTimeOutUsec);
+               VIRTUAL_TIMEOUT_INFLATE64(LockTimeOut);
+               VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC);
+               VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut);
+               VIRTUAL_TIMEOUT_INFLATE64(MutexSpin);
+               VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs);
+       }
+
+       interrupt_latency_tracker_setup();
+       simple_lock_init(&ml_timer_evaluation_slock, 0);
 }
 
 /*
 }
 
 /*
- * This is called from the machine-independent routine cpu_up()
+ * Threshold above which we should attempt to block
+ * instead of spinning for clock_delay_until().
+ */
+
+void
+ml_init_delay_spin_threshold(int threshold_us)
+{
+       nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
+}
+
+boolean_t
+ml_delay_should_spin(uint64_t interval)
+{
+       return (interval < delay_spin_threshold) ? TRUE : FALSE;
+}
+
+/*
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates. Defer to cpu_thread_init().
  */
 void
  * to perform machine-dependent info updates. Defer to cpu_thread_init().
  */
 void
@@ -514,12 +740,14 @@ ml_cpu_up(void)
 }
 
 /*
 }
 
 /*
- * This is called from the machine-independent routine cpu_down()
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates.
  */
 void
 ml_cpu_down(void)
 {
  * to perform machine-dependent info updates.
  */
 void
 ml_cpu_down(void)
 {
+       i386_deactivate_cpu();
+
        return;
 }
 
        return;
 }
 
@@ -570,14 +798,8 @@ void ml_cpu_set_ldt(int selector)
            current_cpu_datap()->cpu_ldt == KERNEL_LDT)
                return;
 
            current_cpu_datap()->cpu_ldt == KERNEL_LDT)
                return;
 
-       /*
-        * If 64bit this requires a mode switch (and back). 
-        */
-       if (cpu_mode_is64bit())
-               ml_64bit_lldt(selector);
-       else
-               lldt(selector);
-       current_cpu_datap()->cpu_ldt = selector;        
+       lldt(selector);
+       current_cpu_datap()->cpu_ldt = selector;
 }
 
 void ml_fp_setvalid(boolean_t value)
 }
 
 void ml_fp_setvalid(boolean_t value)
@@ -590,45 +812,120 @@ uint64_t ml_cpu_int_event_time(void)
        return current_cpu_datap()->cpu_int_event_time;
 }
 
        return current_cpu_datap()->cpu_int_event_time;
 }
 
+vm_offset_t ml_stack_remaining(void)
+{
+       uintptr_t local = (uintptr_t) &local;
 
 
-#if MACH_KDB
+       if (ml_at_interrupt_context() != 0) {
+           return (local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE));
+       } else {
+           return (local - current_thread()->kernel_stack);
+       }
+}
 
 
-/*
- *     Display the global msrs
- * *           
- *     ms
- */
-void 
-db_msr(__unused db_expr_t addr,
-       __unused int have_addr,
-       __unused db_expr_t count,
-       __unused char *modif)
+void
+kernel_preempt_check(void)
 {
 {
+       boolean_t       intr;
+       unsigned long flags;
 
 
-       uint32_t        i, msrlow, msrhigh;
+       assert(get_preemption_level() == 0);
 
 
-       /* Try all of the first 4096 msrs */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n", i, msrhigh, msrlow);
-               }
-       }
+       __asm__ volatile("pushf; pop    %0" :  "=r" (flags));
 
 
-       /* Try all of the 4096 msrs at 0x0C000000 */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(0x0C000000 | i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n",
-                               0x0C000000 | i, msrhigh, msrlow);
-               }
-       }
+       intr = ((flags & EFL_IF) != 0);
 
 
-       /* Try all of the 4096 msrs at 0xC0000000 */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(0xC0000000 | i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n",
-                               0xC0000000 | i, msrhigh, msrlow);
-               }
+       if ((*ast_pending() & AST_URGENT) && intr == TRUE) {
+               /*
+                * can handle interrupts and preemptions 
+                * at this point
+                */
+
+               /*
+                * now cause the PRE-EMPTION trap
+                */
+               __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
        }
 }
 
        }
 }
 
-#endif
+boolean_t machine_timeout_suspended(void) {
+       return (pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake());
+}
+
+/* Eagerly evaluate all pending timer and thread callouts
+ */
+void ml_timer_evaluate(void) {
+       KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+       uint64_t te_end, te_start = mach_absolute_time();
+       simple_lock(&ml_timer_evaluation_slock);
+       ml_timer_evaluation_in_progress = TRUE;
+       thread_call_delayed_timer_rescan_all();
+       mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
+       ml_timer_evaluation_in_progress = FALSE;
+       ml_timer_eager_evaluations++;
+       te_end = mach_absolute_time();
+       ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
+       simple_unlock(&ml_timer_evaluation_slock);
+
+       KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
+
+boolean_t
+ml_timer_forced_evaluation(void) {
+       return ml_timer_evaluation_in_progress;
+}
+
+/* 32-bit right-rotate n bits */
+static inline uint32_t ror32(uint32_t val, const unsigned int n)
+{      
+       __asm__ volatile("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
+       return val;
+}
+
+void
+ml_entropy_collect(void)
+{
+       uint32_t        tsc_lo, tsc_hi;
+       uint32_t        *ep;
+
+       assert(cpu_number() == master_cpu);
+
+       /* update buffer pointer cyclically */
+       if (EntropyData.index_ptr - EntropyData.buffer == ENTROPY_BUFFER_SIZE)
+               ep = EntropyData.index_ptr = EntropyData.buffer;
+       else
+               ep = EntropyData.index_ptr++;
+
+       rdtsc_nofence(tsc_lo, tsc_hi);
+       *ep = ror32(*ep, 9) ^ tsc_lo;
+}
+
+uint64_t
+ml_energy_stat(__unused thread_t t) {
+       return 0;
+}
+
+void
+ml_gpu_stat_update(uint64_t gpu_ns_delta) {
+       current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
+}
+
+uint64_t
+ml_gpu_stat(thread_t t) {
+       return t->machine.thread_gpu_ns;
+}
+
+int plctrace_enabled = 0;
+
+void _disable_preemption(void) {
+       disable_preemption_internal();
+}
+
+void _enable_preemption(void) {
+       enable_preemption_internal();
+}
+
+void plctrace_disable(void) {
+       plctrace_enabled = 0;
+}