]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/cpu_data.h
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / i386 / cpu_data.h
index 466d62f23021814671e1384c88ef3366fb28f403..32ffc1d83eb61049f0c530b8e4abbd1a7119c762 100644 (file)
 #include <i386/rtclock_protos.h>
 #include <i386/pmCPU.h>
 #include <i386/cpu_topology.h>
+#include <i386/seg.h>
 
 #if CONFIG_VMX
 #include <i386/vmx/vmx_cpu.h>
 #endif
 
+#if MONOTONIC
+#include <machine/monotonic.h>
+#endif /* MONOTONIC */
+
 #include <machine/pal_routines.h>
 
 /*
@@ -72,15 +77,18 @@ typedef struct rtclock_timer {
        boolean_t               has_expired;
 } rtclock_timer_t;
 
-
 typedef struct {
-       struct x86_64_tss       *cdi_ktss;
-       struct __attribute__((packed)) {
-               uint16_t size;
-               void *ptr;
-       } cdi_gdt, cdi_idt;
-       struct fake_descriptor  *cdi_ldt;
-       vm_offset_t             cdi_sstk;
+       /* The 'u' suffixed fields store the double-mapped descriptor addresses */
+       struct x86_64_tss       *cdi_ktssu;
+       struct x86_64_tss       *cdi_ktssb;
+       x86_64_desc_register_t  cdi_gdtu;
+       x86_64_desc_register_t  cdi_gdtb;
+       x86_64_desc_register_t  cdi_idtu;
+       x86_64_desc_register_t  cdi_idtb;
+       struct fake_descriptor  *cdi_ldtu;
+       struct fake_descriptor  *cdi_ldtb;
+       vm_offset_t             cdi_sstku;
+       vm_offset_t             cdi_sstkb;
 } cpu_desc_index_t;
 
 typedef enum {
@@ -107,6 +115,13 @@ typedef    uint8_t         pcid_ref_t;
 #define CPU_RTIME_BINS (12)
 #define CPU_ITIME_BINS (CPU_RTIME_BINS)
 
+#define MAXPLFRAMES (16)
+typedef struct {
+       boolean_t pltype;
+       int plevel;
+       uint64_t plbt[MAXPLFRAMES];
+} plrecord_t;
+
 /*
  * Per-cpu data.
  *
@@ -120,6 +135,13 @@ typedef    uint8_t         pcid_ref_t;
  * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu
  * pointers.
  */
+typedef struct {
+       pcid_t                  cpu_pcid_free_hint;
+#define        PMAP_PCID_MAX_PCID      (0x800)
+       pcid_ref_t              cpu_pcid_refcounts[PMAP_PCID_MAX_PCID];
+       pmap_t                  cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID];
+} pcid_cdata_t;
+
 typedef struct cpu_data
 {
        struct pal_cpu_data     cpu_pal_data;           /* PAL-specific data */
@@ -134,16 +156,17 @@ typedef struct cpu_data
        vm_offset_t             cpu_kernel_stack;       /* kernel stack top */
        vm_offset_t             cpu_int_stack_top;
        int                     cpu_interrupt_level;
-       int                     cpu_phys_number;        /* Physical CPU */
-       cpu_id_t                cpu_id;                 /* Platform Expert */
        volatile int            cpu_signals;            /* IPI events */
        volatile int            cpu_prior_signals;      /* Last set of events,
                                                         * debugging
                                                         */
        ast_t                   cpu_pending_ast;
        volatile int            cpu_running;
+#if !MONOTONIC
        boolean_t               cpu_fixed_pmcs_enabled;
+#endif /* !MONOTONIC */
        rtclock_timer_t         rtclock_timer;
+       uint64_t                quantum_timer_deadline;
        volatile addr64_t       cpu_active_cr3 __attribute((aligned(64)));
        union {
                volatile uint32_t cpu_tlb_invalid;
@@ -155,16 +178,22 @@ typedef struct cpu_data
        volatile task_map_t     cpu_task_map;
        volatile addr64_t       cpu_task_cr3;
        addr64_t                cpu_kernel_cr3;
+       volatile addr64_t       cpu_ucr3;
+       boolean_t               cpu_pagezero_mapped;
        cpu_uber_t              cpu_uber;
-       void                    *cpu_chud;
-       void                    *cpu_console_buf;
-       struct x86_lcpu         lcpu;
+/* Double-mapped per-CPU exception stack address */
+       uintptr_t               cd_estack;
+       int                     cpu_xstate;
+/* Address of shadowed, partially mirrored CPU data structures located
+ * in the double mapped PML4
+ */
+       void                    *cd_shadow;
        struct processor        *cpu_processor;
 #if NCOPY_WINDOWS > 0
        struct cpu_pmap         *cpu_pmap;
 #endif
+       struct real_descriptor  *cpu_ldtp;
        struct cpu_desc_table   *cpu_desc_tablep;
-       struct fake_descriptor  *cpu_ldtp;
        cpu_desc_index_t        cpu_desc_index;
        int                     cpu_ldt;
 #if NCOPY_WINDOWS > 0
@@ -178,7 +207,6 @@ typedef struct cpu_data
 #define HWINTCNT_SIZE 256
        uint32_t                cpu_hwIntCnt[HWINTCNT_SIZE];    /* Interrupt counts */
        uint64_t                cpu_hwIntpexits[HWINTCNT_SIZE];
-       uint64_t                cpu_hwIntcexits[HWINTCNT_SIZE];
        uint64_t                cpu_dr7; /* debug control register */
        uint64_t                cpu_int_event_time;     /* intr entry/exit time */
        pal_rtc_nanotime_t      *cpu_nanotime;          /* Nanotime info */
@@ -189,15 +217,16 @@ typedef struct cpu_data
        uint64_t                *cpu_kpc_shadow;
        uint64_t                *cpu_kpc_reload;
 #endif
+#if MONOTONIC
+       struct mt_cpu cpu_monotonic;
+#endif /* MONOTONIC */
        uint32_t                cpu_pmap_pcid_enabled;
        pcid_t                  cpu_active_pcid;
        pcid_t                  cpu_last_pcid;
+       pcid_t                  cpu_kernel_pcid;
        volatile pcid_ref_t     *cpu_pmap_pcid_coherentp;
        volatile pcid_ref_t     *cpu_pmap_pcid_coherentp_kernel;
-#define        PMAP_PCID_MAX_PCID      (0x1000)
-       pcid_t                  cpu_pcid_free_hint;
-       pcid_ref_t              cpu_pcid_refcounts[PMAP_PCID_MAX_PCID];
-       pmap_t                  cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID];
+       pcid_cdata_t            *cpu_pcid_data;
 #ifdef PCID_STATS
        uint64_t                cpu_pmap_pcid_flushes;
        uint64_t                cpu_pmap_pcid_preserves;
@@ -211,11 +240,13 @@ typedef struct cpu_data
        uint64_t                cpu_rtime_total;
        uint64_t                cpu_ixtime;
        uint64_t                cpu_idle_exits;
-       uint64_t                cpu_rtimes[CPU_RTIME_BINS];
-       uint64_t                cpu_itimes[CPU_ITIME_BINS];
-       uint64_t                cpu_cur_insns;
-       uint64_t                cpu_cur_ucc;
-       uint64_t                cpu_cur_urc;
+       uint64_t                cpu_rtimes[CPU_RTIME_BINS];
+       uint64_t                cpu_itimes[CPU_ITIME_BINS];
+#if !MONOTONIC
+       uint64_t                cpu_cur_insns;
+       uint64_t                cpu_cur_ucc;
+       uint64_t                cpu_cur_urc;
+#endif /* !MONOTONIC */
        uint64_t                cpu_gpmcs[4];
        uint64_t                cpu_max_observed_int_latency;
        int                     cpu_max_observed_int_latency_vector;
@@ -243,7 +274,21 @@ typedef struct cpu_data
        int                     cpu_threadtype;
        boolean_t               cpu_iflag;
        boolean_t               cpu_boot_complete;
-       int                     cpu_hibernate;
+       int                     cpu_hibernate;
+#define MAX_PREEMPTION_RECORDS (8)
+#if    DEVELOPMENT || DEBUG
+       int                     cpu_plri;
+       plrecord_t              plrecords[MAX_PREEMPTION_RECORDS];
+#endif
+       void                    *cpu_console_buf;
+       struct x86_lcpu         lcpu;
+       int                     cpu_phys_number;        /* Physical CPU */
+       cpu_id_t                cpu_id;                 /* Platform Expert */
+#if DEBUG
+       uint64_t                cpu_entry_cr3;
+       uint64_t                cpu_exit_cr3;
+       uint64_t                cpu_pcid_last_cr3;
+#endif
 } cpu_data_t;
 
 extern cpu_data_t      *cpu_data_ptr[];  
@@ -351,25 +396,140 @@ get_cpu_phys_number(void)
        CPU_DATA_GET(cpu_phys_number,int)
 }
 
+static inline cpu_data_t *
+current_cpu_datap(void) {
+       CPU_DATA_GET(cpu_this, cpu_data_t *);
+}
+
+/*
+ * Facility to diagnose preemption-level imbalances, which are otherwise
+ * challenging to debug. On each operation that enables or disables preemption,
+ * we record a backtrace into a per-CPU ring buffer, along with the current
+ * preemption level and operation type. Thus, if an imbalance is observed,
+ * one can examine these per-CPU records to determine which codepath failed
+ * to re-enable preemption, enabled premption without a corresponding
+ * disablement etc. The backtracer determines which stack is currently active,
+ * and uses that to perform bounds checks on unterminated stacks.
+ * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15)
+ * The bounds check currently doesn't account for non-default thread stack sizes.
+ */
+#if DEVELOPMENT || DEBUG
+static inline void pltrace_bt(uint64_t *rets, int maxframes, uint64_t stacklo, uint64_t stackhi) {
+       uint64_t *cfp = (uint64_t *) __builtin_frame_address(0);
+       int plbtf;
+
+       assert(stacklo !=0  && stackhi !=0);
+
+       for (plbtf = 0; plbtf < maxframes; plbtf++) {
+               if (((uint64_t)cfp == 0) || (((uint64_t)cfp < stacklo) || ((uint64_t)cfp > stackhi))) {
+                       rets[plbtf] = 0;
+                       continue;
+               }
+               rets[plbtf] = *(cfp + 1);
+               cfp = (uint64_t *) (*cfp);
+       }
+}
+
+
+extern uint32_t                low_intstack[];         /* bottom */
+extern uint32_t                low_eintstack[];        /* top */
+extern char            mp_slave_stack[PAGE_SIZE];
+
+static inline void pltrace_internal(boolean_t enable) {
+       cpu_data_t *cdata = current_cpu_datap();
+       int cpli = cdata->cpu_preemption_level;
+       int cplrecord = cdata->cpu_plri;
+       uint64_t kstackb, kstackt, *plbts;
+
+       assert(cpli >= 0);
+
+       cdata->plrecords[cplrecord].pltype = enable;
+       cdata->plrecords[cplrecord].plevel = cpli;
+
+       plbts = &cdata->plrecords[cplrecord].plbt[0];
+
+       cplrecord++;
+
+       if (cplrecord >= MAX_PREEMPTION_RECORDS) {
+               cplrecord = 0;
+       }
+
+       cdata->cpu_plri = cplrecord;
+       /* Obtain the 'current' program counter, initial backtrace
+        * element. This will also indicate if we were unable to
+        * trace further up the stack for some reason
+        */
+       __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
+           : "=m" (plbts[0])
+           :
+           : "rax");
+
+
+       thread_t cplthread = cdata->cpu_active_thread;
+       if (cplthread) {
+               uintptr_t csp;
+               __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
+               /* Determine which stack we're on to populate stack bounds.
+                * We don't need to trace across stack boundaries for this
+                * routine.
+                */
+               kstackb = cdata->cpu_active_stack;
+               kstackt = kstackb + KERNEL_STACK_SIZE;
+               if (csp < kstackb || csp > kstackt) {
+                       kstackt = cdata->cpu_kernel_stack;
+                       kstackb = kstackb - KERNEL_STACK_SIZE;
+                       if (csp < kstackb || csp > kstackt) {
+                               kstackt = cdata->cpu_int_stack_top;
+                               kstackb = kstackt - INTSTACK_SIZE;
+                               if (csp < kstackb || csp > kstackt) {
+                                       kstackt = (uintptr_t)low_eintstack;
+                                       kstackb = (uintptr_t)low_eintstack - INTSTACK_SIZE;
+                                       if (csp < kstackb || csp > kstackt) {
+                                               kstackb = (uintptr_t) mp_slave_stack;
+                                               kstackt = (uintptr_t) mp_slave_stack + PAGE_SIZE;
+                                       }
+                               }
+                       }
+               }
+
+               if (kstackb) {
+                       pltrace_bt(&plbts[1], MAXPLFRAMES - 1, kstackb, kstackt);
+               }
+       }
+}
+
+extern int plctrace_enabled;
+#endif /* DEVELOPMENT || DEBUG */
+
+static inline void pltrace(boolean_t plenable) {
+#if DEVELOPMENT || DEBUG
+       if (__improbable(plctrace_enabled != 0)) {
+               pltrace_internal(plenable);
+       }
+#else
+       (void)plenable;
+#endif
+}
 
 static inline void
-disable_preemption(void)
-{
+disable_preemption_internal(void) {
+       assert(get_preemption_level() >= 0);
+
 #if defined(__clang__)
        cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
        cpu_data->cpu_preemption_level++;
 #else
        __asm__ volatile ("incl %%gs:%P0"
-                       :
-                       : "i" (offsetof(cpu_data_t, cpu_preemption_level)));
+           :
+           : "i" (offsetof(cpu_data_t, cpu_preemption_level)));
 #endif
+       pltrace(FALSE);
 }
 
 static inline void
-enable_preemption(void)
-{
+enable_preemption_internal(void) {
        assert(get_preemption_level() > 0);
-
+       pltrace(TRUE);
 #if defined(__clang__)
        cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
        if (0 == --cpu_data->cpu_preemption_level)
@@ -390,6 +550,7 @@ enable_preemption_no_check(void)
 {
        assert(get_preemption_level() > 0);
 
+       pltrace(TRUE);
 #if defined(__clang__)
        cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
        cpu_data->cpu_preemption_level--;
@@ -401,36 +562,67 @@ enable_preemption_no_check(void)
 #endif
 }
 
+static inline void
+_enable_preemption_no_check(void) {
+       enable_preemption_no_check();
+}
+
 static inline void
 mp_disable_preemption(void)
 {
-       disable_preemption();
+       disable_preemption_internal();
 }
 
 static inline void
-mp_enable_preemption(void)
+_mp_disable_preemption(void)
 {
-       enable_preemption();
+       disable_preemption_internal();
 }
 
 static inline void
-mp_enable_preemption_no_check(void)
+mp_enable_preemption(void)
 {
+       enable_preemption_internal();
+}
+
+static inline void
+_mp_enable_preemption(void) {
+       enable_preemption_internal();
+}
+
+static inline void
+mp_enable_preemption_no_check(void) {
        enable_preemption_no_check();
 }
 
-static inline cpu_data_t *
-current_cpu_datap(void)
-{
-       CPU_DATA_GET(cpu_this, cpu_data_t *);
+static inline void
+_mp_enable_preemption_no_check(void) {
+       enable_preemption_no_check();
 }
 
+#ifdef XNU_KERNEL_PRIVATE
+#define disable_preemption() disable_preemption_internal()
+#define enable_preemption() enable_preemption_internal()
+#define MACHINE_PREEMPTION_MACROS (1)
+#endif
+
 static inline cpu_data_t *
-cpu_datap(int cpu)
-{
+cpu_datap(int cpu) {
        return cpu_data_ptr[cpu];
 }
 
+static inline int
+cpu_is_running(int cpu) {
+       return ((cpu_datap(cpu) != NULL) && (cpu_datap(cpu)->cpu_running));
+}
+
+#ifdef MACH_KERNEL_PRIVATE
+static inline cpu_data_t *
+cpu_shadowp(int cpu) {
+       return cpu_data_ptr[cpu]->cd_shadow;
+}
+
+#endif
 extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu);
 extern void cpu_data_realloc(void);