]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/cpu_data.h
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / i386 / cpu_data.h
index a479eaea8fe601497a8a8a7a6fe091bda21c66bd..10d2355172831ad24c2f69bf557926bae8b6e2fd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -125,6 +125,7 @@ typedef struct {
        uint64_t plbt[MAX_TRACE_BTFRAMES];
 } plrecord_t;
 
+#if     DEVELOPMENT || DEBUG
 typedef enum {
        IOTRACE_PHYS_READ = 1,
        IOTRACE_PHYS_WRITE,
@@ -145,7 +146,17 @@ typedef struct {
        uint64_t        backtrace[MAX_TRACE_BTFRAMES];
 } iotrace_entry_t;
 
-#if     DEVELOPMENT || DEBUG
+typedef struct {
+       int             vector;                 /* Vector number of interrupt */
+       thread_t        curthread;              /* Current thread at the time of the interrupt */
+       uint64_t        interrupted_pc;
+       int             curpl;                  /* Current preemption level */
+       int             curil;                  /* Current interrupt level */
+       uint64_t        start_time_abs;
+       uint64_t        duration;
+       uint64_t        backtrace[MAX_TRACE_BTFRAMES];
+} traptrace_entry_t;
+
 #define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64)
 #define IOTRACE_MAX_ENTRIES_PER_CPU (256)
 extern volatile int mmiotrace_enabled;
@@ -154,7 +165,14 @@ extern int iotrace_entries_per_cpu;
 extern int *iotrace_next;
 extern iotrace_entry_t **iotrace_ring;
 
-extern void init_iotrace_bufs(int cpucnt, int entries_per_cpu);
+#define TRAPTRACE_INVALID_INDEX (~0U)
+#define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16)
+#define TRAPTRACE_MAX_ENTRIES_PER_CPU (256)
+extern volatile int traptrace_enabled;
+extern int traptrace_generators;
+extern int traptrace_entries_per_cpu;
+extern int *traptrace_next;
+extern traptrace_entry_t **traptrace_ring;
 #endif /* DEVELOPMENT || DEBUG */
 
 /*
@@ -181,26 +199,32 @@ typedef struct cpu_data {
        struct pal_cpu_data     cpu_pal_data;           /* PAL-specific data */
 #define                         cpu_pd cpu_pal_data     /* convenience alias */
        struct cpu_data         *cpu_this;              /* pointer to myself */
+       vm_offset_t             cpu_pcpu_base;
        thread_t                cpu_active_thread;
        thread_t                cpu_nthread;
-       volatile int            cpu_preemption_level;
        int                     cpu_number;             /* Logical CPU */
        void                    *cpu_int_state;         /* interrupt state */
        vm_offset_t             cpu_active_stack;       /* kernel stack base */
        vm_offset_t             cpu_kernel_stack;       /* kernel stack top */
        vm_offset_t             cpu_int_stack_top;
-       int                     cpu_interrupt_level;
        volatile int            cpu_signals;            /* IPI events */
        volatile int            cpu_prior_signals;      /* Last set of events,
                                                         * debugging
                                                         */
        ast_t                   cpu_pending_ast;
+       /*
+        * Note if rearranging fields:
+        * We want cpu_preemption_level on a different
+        * cache line than cpu_active_thread
+        * for optimizing mtx_spin phase.
+        */
+       int                     cpu_interrupt_level;
+       volatile int            cpu_preemption_level;
        volatile int            cpu_running;
 #if !MONOTONIC
        boolean_t               cpu_fixed_pmcs_enabled;
 #endif /* !MONOTONIC */
        rtclock_timer_t         rtclock_timer;
-       uint64_t                quantum_timer_deadline;
        volatile addr64_t       cpu_active_cr3 __attribute((aligned(64)));
        union {
                volatile uint32_t cpu_tlb_invalid;
@@ -238,20 +262,10 @@ typedef struct cpu_data {
        uint16_t                cpu_tlb_gen_counts_global[MAX_CPUS];
 
        struct processor        *cpu_processor;
-#if NCOPY_WINDOWS > 0
-       struct cpu_pmap         *cpu_pmap;
-#endif
        struct real_descriptor  *cpu_ldtp;
        struct cpu_desc_table   *cpu_desc_tablep;
        cpu_desc_index_t        cpu_desc_index;
        int                     cpu_ldt;
-#if NCOPY_WINDOWS > 0
-       vm_offset_t             cpu_copywindow_base;
-       uint64_t                *cpu_copywindow_pdp;
-
-       vm_offset_t             cpu_physwindow_base;
-       uint64_t                *cpu_physwindow_ptep;
-#endif
 
 #define HWINTCNT_SIZE 256
        uint32_t                cpu_hwIntCnt[HWINTCNT_SIZE];    /* Interrupt counts */
@@ -289,6 +303,12 @@ typedef struct cpu_data {
        uint64_t                cpu_rtime_total;
        uint64_t                cpu_ixtime;
        uint64_t                cpu_idle_exits;
+       /*
+        * Note that the cacheline-copy mechanism uses the cpu_rtimes field in the shadow CPU
+        * structures to temporarily stash the code cacheline that includes the instruction
+        * pointer at the time of the fault (this field is otherwise unused in the shadow
+        * CPU structures).
+        */
        uint64_t                cpu_rtimes[CPU_RTIME_BINS];
        uint64_t                cpu_itimes[CPU_ITIME_BINS];
 #if !MONOTONIC
@@ -338,76 +358,31 @@ typedef struct cpu_data {
        uint64_t                cpu_pcid_last_cr3;
 #endif
        boolean_t               cpu_rendezvous_in_progress;
+#if CST_DEMOTION_DEBUG
+       /* Count of thread wakeups issued by this processor */
+       uint64_t                cpu_wakeups_issued_total;
+#endif
+#if DEBUG || DEVELOPMENT
+       uint64_t                tsc_sync_delta;
+#endif
 } cpu_data_t;
 
 extern cpu_data_t       *cpu_data_ptr[];
 
-/* Macro to generate inline bodies to retrieve per-cpu data fields. */
-#if defined(__clang__)
-#define GS_RELATIVE volatile __attribute__((address_space(256)))
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE,MEMBER)
+/*
+ * __SEG_GS marks %gs-relative operations:
+ *   https://clang.llvm.org/docs/LanguageExtensions.html#memory-references-to-specified-segments
+ *   https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
+ */
+#if defined(__SEG_GS)
+// __seg_gs exists
+#elif defined(__clang__)
+#define __seg_gs __attribute__((address_space(256)))
+#else
+#error use a compiler that supports address spaces or __seg_gs
 #endif
 
-#define CPU_DATA_GET(member, type)                                                                               \
-       cpu_data_t GS_RELATIVE *cpu_data =                                                      \
-               (cpu_data_t GS_RELATIVE *)0UL;                                                                  \
-       type ret;                                                                                                                       \
-       ret = cpu_data->member;                                                                                         \
-       return ret;
-
-#define CPU_DATA_GET_INDEX(member, index, type)                                                   \
-       cpu_data_t GS_RELATIVE *cpu_data =                                                      \
-               (cpu_data_t GS_RELATIVE *)0UL;                                                                  \
-       type ret;                                                                                                                       \
-       ret = cpu_data->member[index];                                                                          \
-       return ret;
-
-#define CPU_DATA_SET(member, value)                                                                              \
-       cpu_data_t GS_RELATIVE *cpu_data =                                                      \
-               (cpu_data_t GS_RELATIVE *)0UL;                                                                  \
-       cpu_data->member = value;
-
-#define CPU_DATA_XCHG(member, value, type)                                                                \
-       cpu_data_t GS_RELATIVE *cpu_data =                                                      \
-               (cpu_data_t GS_RELATIVE *)0UL;                                                                  \
-       type ret;                                                                                                                       \
-       ret = cpu_data->member;                                                                                         \
-       cpu_data->member = value;                                                                                       \
-       return ret;
-
-#else /* !defined(__clang__) */
-
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#endif /* offsetof */
-#define CPU_DATA_GET(member, type)                                       \
-       type ret;                                                       \
-       __asm__ volatile ("mov %%gs:%P1,%0"                             \
-               : "=r" (ret)                                            \
-               : "i" (offsetof(cpu_data_t,member)));                   \
-       return ret;
-
-#define CPU_DATA_GET_INDEX(member, index, type)   \
-       type ret;                                                       \
-       __asm__ volatile ("mov %%gs:(%1),%0"                            \
-               : "=r" (ret)                                            \
-               : "r" (offsetof(cpu_data_t,member[index])));                    \
-       return ret;
-
-#define CPU_DATA_SET(member, value)                                      \
-       __asm__ volatile ("mov %0,%%gs:%P1"                             \
-               :                                                       \
-               : "r" (value), "i" (offsetof(cpu_data_t,member)));
-
-#define CPU_DATA_XCHG(member, value, type)                                \
-       type ret;                                                       \
-       __asm__ volatile ("xchg %0,%%gs:%P1"                            \
-               : "=r" (ret)                                            \
-               : "i" (offsetof(cpu_data_t,member)), "0" (value));      \
-       return ret;
-
-#endif /* !defined(__clang__) */
+#define CPU_DATA()            ((cpu_data_t __seg_gs *)0UL)
 
 /*
  * Everyone within the osfmk part of the kernel can use the fast
@@ -434,13 +409,13 @@ extern cpu_data_t       *cpu_data_ptr[];
 static inline thread_t
 get_active_thread_volatile(void)
 {
-       CPU_DATA_GET(cpu_active_thread, thread_t)
+       return CPU_DATA()->cpu_active_thread;
 }
 
 static inline __attribute__((const)) thread_t
 get_active_thread(void)
 {
-       CPU_DATA_GET(cpu_active_thread, thread_t)
+       return CPU_DATA()->cpu_active_thread;
 }
 
 #define current_thread_fast()           get_active_thread()
@@ -452,28 +427,33 @@ get_active_thread(void)
 static inline int
 get_preemption_level(void)
 {
-       CPU_DATA_GET(cpu_preemption_level, int)
+       return CPU_DATA()->cpu_preemption_level;
 }
 static inline int
 get_interrupt_level(void)
 {
-       CPU_DATA_GET(cpu_interrupt_level, int)
+       return CPU_DATA()->cpu_interrupt_level;
 }
 static inline int
 get_cpu_number(void)
 {
-       CPU_DATA_GET(cpu_number, int)
+       return CPU_DATA()->cpu_number;
+}
+static inline vm_offset_t
+get_current_percpu_base(void)
+{
+       return CPU_DATA()->cpu_pcpu_base;
 }
 static inline int
 get_cpu_phys_number(void)
 {
-       CPU_DATA_GET(cpu_phys_number, int)
+       return CPU_DATA()->cpu_phys_number;
 }
 
 static inline cpu_data_t *
 current_cpu_datap(void)
 {
-       CPU_DATA_GET(cpu_this, cpu_data_t *);
+       return CPU_DATA()->cpu_this;
 }
 
 /*
@@ -490,11 +470,12 @@ current_cpu_datap(void)
  */
 #if DEVELOPMENT || DEBUG
 static inline void
-rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
+rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata, uint64_t frameptr, bool use_cursp)
 {
        extern uint32_t         low_intstack[];         /* bottom */
        extern uint32_t         low_eintstack[];        /* top */
        extern char             mp_slave_stack[];
+       int                     btidx = 0;
 
        uint64_t kstackb, kstackt;
 
@@ -502,16 +483,21 @@ rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
         * element. This will also indicate if we were unable to
         * trace further up the stack for some reason
         */
-       __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
-             : "=m" (rets[0])
-             :
-             : "rax");
-
+       if (use_cursp) {
+               __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
+                     : "=m" (rets[btidx++])
+                     :
+                     : "rax");
+       }
 
        thread_t cplthread = cdata->cpu_active_thread;
        if (cplthread) {
                uintptr_t csp;
-               __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
+               if (use_cursp == true) {
+                       __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
+               } else {
+                       csp = frameptr;
+               }
                /* Determine which stack we're on to populate stack bounds.
                 * We don't need to trace across stack boundaries for this
                 * routine.
@@ -539,10 +525,10 @@ rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
                }
 
                if (__probable(kstackb && kstackt)) {
-                       uint64_t *cfp = (uint64_t *) __builtin_frame_address(0);
+                       uint64_t *cfp = (uint64_t *) frameptr;
                        int rbbtf;
 
-                       for (rbbtf = 1; rbbtf < maxframes; rbbtf++) {
+                       for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) {
                                if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) {
                                        rets[rbbtf] = 0;
                                        continue;
@@ -554,6 +540,7 @@ rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
        }
 }
 
+__attribute__((noinline))
 static inline void
 pltrace_internal(boolean_t enable)
 {
@@ -577,7 +564,7 @@ pltrace_internal(boolean_t enable)
 
        cdata->cpu_plri = cplrecord;
 
-       rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata);
+       rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), false);
 }
 
 extern int plctrace_enabled;
@@ -610,11 +597,58 @@ iotrace(iotrace_type_e type, uint64_t vaddr, uint64_t paddr, int size, uint64_t
        iotrace_next[cpu_num] = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1);
 
        rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0],
-           MAX_TRACE_BTFRAMES - 1, cdata);
+           MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true);
+}
+
+static inline uint32_t
+traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr)
+{
+       cpu_data_t *cdata;
+       unsigned int cpu_num, nextidx;
+       traptrace_entry_t *cur_traptrace_ring;
+
+       if (__improbable(traptrace_enabled == 0 || traptrace_generators == 0)) {
+               return TRAPTRACE_INVALID_INDEX;
+       }
+
+       assert(ml_get_interrupts_enabled() == FALSE);
+       cdata = current_cpu_datap();
+       cpu_num = (unsigned int)cdata->cpu_number;
+       nextidx = (unsigned int)traptrace_next[cpu_num];
+       /* prevent nested interrupts from clobbering this record */
+       traptrace_next[cpu_num] = (int)(((nextidx + 1) >= (unsigned int)traptrace_entries_per_cpu) ? 0 : (nextidx + 1));
+
+       cur_traptrace_ring = traptrace_ring[cpu_num];
+
+       cur_traptrace_ring[nextidx].vector = vecnum;
+       cur_traptrace_ring[nextidx].curthread = current_thread();
+       cur_traptrace_ring[nextidx].interrupted_pc = ipc;
+       cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level;
+       cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level;
+       cur_traptrace_ring[nextidx].start_time_abs = sabs;
+       cur_traptrace_ring[nextidx].duration = ~0ULL;
+
+       rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0],
+           MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false);
+
+       assert(nextidx <= 0xFFFF);
+
+       return (uint32_t)((cpu_num << 16) | nextidx);
 }
-#endif /* DEVELOPMENT || DEBUG */
 
 static inline void
+traptrace_end(uint32_t index, uint64_t eabs)
+{
+       if (index != TRAPTRACE_INVALID_INDEX) {
+               traptrace_entry_t *ttentp = &traptrace_ring[index >> 16][index & 0xFFFF];
+
+               ttentp->duration = eabs - ttentp->start_time_abs;
+       }
+}
+
+#endif /* DEVELOPMENT || DEBUG */
+
+__header_always_inline void
 pltrace(boolean_t plenable)
 {
 #if DEVELOPMENT || DEBUG
@@ -631,16 +665,9 @@ disable_preemption_internal(void)
 {
        assert(get_preemption_level() >= 0);
 
-       os_compiler_barrier(release);
-#if defined(__clang__)
-       cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
-       cpu_data->cpu_preemption_level++;
-#else
-       __asm__ volatile ("incl %%gs:%P0"
-            :
-            : "i" (offsetof(cpu_data_t, cpu_preemption_level)));
-#endif
-       os_compiler_barrier(acquire);
+       os_compiler_barrier();
+       CPU_DATA()->cpu_preemption_level++;
+       os_compiler_barrier();
        pltrace(FALSE);
 }
 
@@ -649,22 +676,11 @@ enable_preemption_internal(void)
 {
        assert(get_preemption_level() > 0);
        pltrace(TRUE);
-       os_compiler_barrier(release);
-#if defined(__clang__)
-       cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
-       if (0 == --cpu_data->cpu_preemption_level) {
+       os_compiler_barrier();
+       if (0 == --CPU_DATA()->cpu_preemption_level) {
                kernel_preempt_check();
        }
-#else
-       __asm__ volatile ("decl %%gs:%P0                \n\t"
-                          "jne 1f                      \n\t"
-                          "call _kernel_preempt_check  \n\t"
-                          "1:"
-                        : /* no outputs */
-                        : "i" (offsetof(cpu_data_t, cpu_preemption_level))
-                        : "eax", "ecx", "edx", "cc", "memory");
-#endif
-       os_compiler_barrier(acquire);
+       os_compiler_barrier();
 }
 
 static inline void
@@ -673,17 +689,9 @@ enable_preemption_no_check(void)
        assert(get_preemption_level() > 0);
 
        pltrace(TRUE);
-       os_compiler_barrier(release);
-#if defined(__clang__)
-       cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
-       cpu_data->cpu_preemption_level--;
-#else
-       __asm__ volatile ("decl %%gs:%P0"
-                        : /* no outputs */
-                        : "i" (offsetof(cpu_data_t, cpu_preemption_level))
-                        : "cc", "memory");
-#endif
-       os_compiler_barrier(acquire);
+       os_compiler_barrier();
+       CPU_DATA()->cpu_preemption_level--;
+       os_compiler_barrier();
 }
 
 static inline void