X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c910b4d9d2451126ae3917b931cd4390c11e1d52..d9a64523371fa019c4575bb400cbbc3a50ac9903:/osfmk/i386/cpu_data.h diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index e41f6b8cd..4201068f4 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -35,19 +35,29 @@ #include -#if defined(__GNUC__) - #include #include +#include #include #include #include #include -#include +#include +#include +#include #include #include +#include +#if CONFIG_VMX #include +#endif + +#if MONOTONIC +#include +#endif /* MONOTONIC */ + +#include /* * Data structures referenced (anonymously) from per-cpu data: @@ -55,36 +65,38 @@ struct cpu_cons_buffer; struct cpu_desc_table; struct mca_state; - +struct prngContext; /* * Data structures embedded in per-cpu data: */ typedef struct rtclock_timer { - queue_head_t queue; + mpqueue_head_t queue; uint64_t deadline; - boolean_t is_set; + uint64_t when_set; boolean_t has_expired; } rtclock_timer_t; - typedef struct { - struct i386_tss *cdi_ktss; -#if MACH_KDB - struct i386_tss *cdi_dbtss; -#endif /* MACH_KDB */ - struct fake_descriptor *cdi_gdt; - struct fake_descriptor *cdi_idt; - struct fake_descriptor *cdi_ldt; - vm_offset_t cdi_sstk; + /* The 'u' suffixed fields store the double-mapped descriptor addresses */ + struct x86_64_tss *cdi_ktssu; + struct x86_64_tss *cdi_ktssb; + x86_64_desc_register_t cdi_gdtu; + x86_64_desc_register_t cdi_gdtb; + x86_64_desc_register_t cdi_idtu; + x86_64_desc_register_t cdi_idtb; + struct fake_descriptor *cdi_ldtu; + struct fake_descriptor *cdi_ldtb; + vm_offset_t cdi_sstku; + vm_offset_t cdi_sstkb; } cpu_desc_index_t; typedef enum { - TASK_MAP_32BIT, /* 32-bit, compatibility mode */ - TASK_MAP_64BIT, /* 64-bit, separate address space */ - TASK_MAP_64BIT_SHARED /* 64-bit, kernel-shared addr space */ + TASK_MAP_32BIT, /* 32-bit user, compatibility mode */ + TASK_MAP_64BIT, /* 64-bit user thread, shared space */ } task_map_t; + /* * This structure is used on entry into the (uber-)kernel on syscall from * a 64-bit user. It contains the address of the machine state save area @@ -94,9 +106,22 @@ typedef enum { typedef struct { addr64_t cu_isf; /* thread->pcb->iss.isf */ uint64_t cu_tmp; /* temporary scratch */ - addr64_t cu_user_gs_base; + addr64_t cu_user_gs_base; } cpu_uber_t; +typedef uint16_t pcid_t; +typedef uint8_t pcid_ref_t; + +#define CPU_RTIME_BINS (12) +#define CPU_ITIME_BINS (CPU_RTIME_BINS) + +#define MAXPLFRAMES (16) +typedef struct { + boolean_t pltype; + int plevel; + uint64_t plbt[MAXPLFRAMES]; +} plrecord_t; + /* * Per-cpu data. * @@ -110,114 +135,270 @@ typedef struct { * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu * pointers. */ +typedef struct { + pcid_t cpu_pcid_free_hint; +#define PMAP_PCID_MAX_PCID (0x800) + pcid_ref_t cpu_pcid_refcounts[PMAP_PCID_MAX_PCID]; + pmap_t cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID]; +} pcid_cdata_t; + typedef struct cpu_data { + struct pal_cpu_data cpu_pal_data; /* PAL-specific data */ +#define cpu_pd cpu_pal_data /* convenience alias */ struct cpu_data *cpu_this; /* pointer to myself */ thread_t cpu_active_thread; + thread_t cpu_nthread; + volatile int cpu_preemption_level; + int cpu_number; /* Logical CPU */ void *cpu_int_state; /* interrupt state */ vm_offset_t cpu_active_stack; /* kernel stack base */ vm_offset_t cpu_kernel_stack; /* kernel stack top */ vm_offset_t cpu_int_stack_top; - int cpu_preemption_level; - int cpu_simple_lock_count; int cpu_interrupt_level; - int cpu_number; /* Logical CPU */ - int cpu_phys_number; /* Physical CPU */ - cpu_id_t cpu_id; /* Platform Expert */ - int cpu_signals; /* IPI events */ - int cpu_mcount_off; /* mcount recursion */ + volatile int cpu_signals; /* IPI events */ + volatile int cpu_prior_signals; /* Last set of events, + * debugging + */ ast_t cpu_pending_ast; - int cpu_type; - int cpu_subtype; - int cpu_threadtype; - int cpu_running; + volatile int cpu_running; +#if !MONOTONIC + boolean_t cpu_fixed_pmcs_enabled; +#endif /* !MONOTONIC */ rtclock_timer_t rtclock_timer; - boolean_t cpu_is64bit; - task_map_t cpu_task_map; - addr64_t cpu_task_cr3; - addr64_t cpu_active_cr3; + uint64_t quantum_timer_deadline; + volatile addr64_t cpu_active_cr3 __attribute((aligned(64))); + union { + volatile uint32_t cpu_tlb_invalid; + struct { + volatile uint16_t cpu_tlb_invalid_local; + volatile uint16_t cpu_tlb_invalid_global; + }; + }; + volatile task_map_t cpu_task_map; + volatile addr64_t cpu_task_cr3; addr64_t cpu_kernel_cr3; + volatile addr64_t cpu_ucr3; + boolean_t cpu_pagezero_mapped; cpu_uber_t cpu_uber; - void *cpu_chud; - void *cpu_console_buf; - struct x86_lcpu lcpu; +/* Double-mapped per-CPU exception stack address */ + uintptr_t cd_estack; + int cpu_xstate; +/* Address of shadowed, partially mirrored CPU data structures located + * in the double mapped PML4 + */ + void *cd_shadow; struct processor *cpu_processor; +#if NCOPY_WINDOWS > 0 struct cpu_pmap *cpu_pmap; +#endif + struct real_descriptor *cpu_ldtp; struct cpu_desc_table *cpu_desc_tablep; - struct fake_descriptor *cpu_ldtp; cpu_desc_index_t cpu_desc_index; int cpu_ldt; -#ifdef MACH_KDB - /* XXX Untested: */ - int cpu_db_pass_thru; - vm_offset_t cpu_db_stacks; - void *cpu_kdb_saved_state; - spl_t cpu_kdb_saved_ipl; - int cpu_kdb_is_slave; - int cpu_kdb_active; -#endif /* MACH_KDB */ - boolean_t cpu_iflag; - boolean_t cpu_boot_complete; - int cpu_hibernate; - +#if NCOPY_WINDOWS > 0 vm_offset_t cpu_copywindow_base; uint64_t *cpu_copywindow_pdp; vm_offset_t cpu_physwindow_base; uint64_t *cpu_physwindow_ptep; - void *cpu_hi_iss; - boolean_t cpu_tlb_invalid; - uint32_t cpu_hwIntCnt[256]; /* Interrupt counts */ +#endif + +#define HWINTCNT_SIZE 256 + uint32_t cpu_hwIntCnt[HWINTCNT_SIZE]; /* Interrupt counts */ + uint64_t cpu_hwIntpexits[HWINTCNT_SIZE]; uint64_t cpu_dr7; /* debug control register */ uint64_t cpu_int_event_time; /* intr entry/exit time */ + pal_rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ +#if KPC + /* double-buffered performance counter data */ + uint64_t *cpu_kpc_buf[2]; + /* PMC shadow and reload value buffers */ + uint64_t *cpu_kpc_shadow; + uint64_t *cpu_kpc_reload; +#endif +#if MONOTONIC + struct mt_cpu cpu_monotonic; +#endif /* MONOTONIC */ + uint32_t cpu_pmap_pcid_enabled; + pcid_t cpu_active_pcid; + pcid_t cpu_last_pcid; + pcid_t cpu_kernel_pcid; + volatile pcid_ref_t *cpu_pmap_pcid_coherentp; + volatile pcid_ref_t *cpu_pmap_pcid_coherentp_kernel; + pcid_cdata_t *cpu_pcid_data; +#ifdef PCID_STATS + uint64_t cpu_pmap_pcid_flushes; + uint64_t cpu_pmap_pcid_preserves; +#endif + uint64_t cpu_aperf; + uint64_t cpu_mperf; + uint64_t cpu_c3res; + uint64_t cpu_c6res; + uint64_t cpu_c7res; + uint64_t cpu_itime_total; + uint64_t cpu_rtime_total; + uint64_t cpu_ixtime; + uint64_t cpu_idle_exits; + uint64_t cpu_rtimes[CPU_RTIME_BINS]; + uint64_t cpu_itimes[CPU_ITIME_BINS]; +#if !MONOTONIC + uint64_t cpu_cur_insns; + uint64_t cpu_cur_ucc; + uint64_t cpu_cur_urc; +#endif /* !MONOTONIC */ + uint64_t cpu_gpmcs[4]; + uint64_t cpu_max_observed_int_latency; + int cpu_max_observed_int_latency_vector; + volatile boolean_t cpu_NMI_acknowledged; + uint64_t debugger_entry_time; + uint64_t debugger_ipi_time; + /* A separate nested interrupt stack flag, to account + * for non-nested interrupts arriving while on the interrupt stack + * Currently only occurs when AICPM enables interrupts on the + * interrupt stack during processor offlining. + */ + uint32_t cpu_nested_istack; + uint32_t cpu_nested_istack_events; + x86_saved_state64_t *cpu_fatal_trap_state; + x86_saved_state64_t *cpu_post_fatal_trap_state; +#if CONFIG_VMX vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */ +#endif +#if CONFIG_MCA struct mca_state *cpu_mca_state; /* State at MC fault */ - uint64_t cpu_uber_arg_store; /* Double mapped address - * of current thread's - * uu_arg array. - */ - uint64_t cpu_uber_arg_store_valid; /* Double mapped - * address of pcb - * arg store - * validity flag. - */ - rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ - +#endif + int cpu_type; + int cpu_subtype; + int cpu_threadtype; + boolean_t cpu_iflag; + boolean_t cpu_boot_complete; + int cpu_hibernate; +#define MAX_PREEMPTION_RECORDS (8) +#if DEVELOPMENT || DEBUG + int cpu_plri; + plrecord_t plrecords[MAX_PREEMPTION_RECORDS]; +#endif + void *cpu_console_buf; + struct x86_lcpu lcpu; + int cpu_phys_number; /* Physical CPU */ + cpu_id_t cpu_id; /* Platform Expert */ +#if DEBUG + uint64_t cpu_entry_cr3; + uint64_t cpu_exit_cr3; + uint64_t cpu_pcid_last_cr3; +#endif + boolean_t cpu_rendezvous_in_progress; } cpu_data_t; extern cpu_data_t *cpu_data_ptr[]; -extern cpu_data_t cpu_data_master; /* Macro to generate inline bodies to retrieve per-cpu data fields. */ +#if defined(__clang__) +#define GS_RELATIVE volatile __attribute__((address_space(256))) +#ifndef offsetof +#define offsetof(TYPE,MEMBER) __builtin_offsetof(TYPE,MEMBER) +#endif + +#define CPU_DATA_GET(member,type) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + type ret; \ + ret = cpu_data->member; \ + return ret; + +#define CPU_DATA_GET_INDEX(member,index,type) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + type ret; \ + ret = cpu_data->member[index]; \ + return ret; + +#define CPU_DATA_SET(member,value) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + cpu_data->member = value; + +#define CPU_DATA_XCHG(member,value,type) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + type ret; \ + ret = cpu_data->member; \ + cpu_data->member = value; \ + return ret; + +#else /* !defined(__clang__) */ + #ifndef offsetof #define offsetof(TYPE,MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif /* offsetof */ #define CPU_DATA_GET(member,type) \ type ret; \ - __asm__ volatile ("movl %%gs:%P1,%0" \ + __asm__ volatile ("mov %%gs:%P1,%0" \ : "=r" (ret) \ : "i" (offsetof(cpu_data_t,member))); \ return ret; +#define CPU_DATA_GET_INDEX(member,index,type) \ + type ret; \ + __asm__ volatile ("mov %%gs:(%1),%0" \ + : "=r" (ret) \ + : "r" (offsetof(cpu_data_t,member[index]))); \ + return ret; + +#define CPU_DATA_SET(member,value) \ + __asm__ volatile ("mov %0,%%gs:%P1" \ + : \ + : "r" (value), "i" (offsetof(cpu_data_t,member))); + +#define CPU_DATA_XCHG(member,value,type) \ + type ret; \ + __asm__ volatile ("xchg %0,%%gs:%P1" \ + : "=r" (ret) \ + : "i" (offsetof(cpu_data_t,member)), "0" (value)); \ + return ret; + +#endif /* !defined(__clang__) */ + /* * Everyone within the osfmk part of the kernel can use the fast * inline versions of these routines. Everyone outside, must call * the real thing, */ + + +/* + * The "volatile" flavor of current_thread() is intended for use by + * scheduler code which may need to update the thread pointer in the + * course of a context switch. Any call to current_thread() made + * prior to the thread pointer update should be safe to optimize away + * as it should be consistent with that thread's state to the extent + * the compiler can reason about it. Likewise, the context switch + * path will eventually result in an arbitrary branch to the new + * thread's pc, about which the compiler won't be able to reason. + * Thus any compile-time optimization of current_thread() calls made + * within the new thread should be safely encapsulated in its + * register/stack state. The volatile form therefore exists to cover + * the window between the thread pointer update and the branch to + * the new pc. + */ static inline thread_t +get_active_thread_volatile(void) +{ + CPU_DATA_GET(cpu_active_thread,thread_t) +} + +static inline __pure2 thread_t get_active_thread(void) { CPU_DATA_GET(cpu_active_thread,thread_t) } + #define current_thread_fast() get_active_thread() +#define current_thread_volatile() get_active_thread_volatile() #define current_thread() current_thread_fast() -static inline boolean_t -get_is64bit(void) -{ - CPU_DATA_GET(cpu_is64bit, boolean_t) -} -#define cpu_mode_is64bit() get_is64bit() +#define cpu_mode_is64bit() TRUE static inline int get_preemption_level(void) @@ -225,11 +406,6 @@ get_preemption_level(void) CPU_DATA_GET(cpu_preemption_level,int) } static inline int -get_simple_lock_count(void) -{ - CPU_DATA_GET(cpu_simple_lock_count,int) -} -static inline int get_interrupt_level(void) { CPU_DATA_GET(cpu_interrupt_level,int) @@ -245,19 +421,145 @@ get_cpu_phys_number(void) CPU_DATA_GET(cpu_phys_number,int) } +static inline cpu_data_t * +current_cpu_datap(void) { + CPU_DATA_GET(cpu_this, cpu_data_t *); +} + +/* + * Facility to diagnose preemption-level imbalances, which are otherwise + * challenging to debug. On each operation that enables or disables preemption, + * we record a backtrace into a per-CPU ring buffer, along with the current + * preemption level and operation type. Thus, if an imbalance is observed, + * one can examine these per-CPU records to determine which codepath failed + * to re-enable preemption, enabled premption without a corresponding + * disablement etc. The backtracer determines which stack is currently active, + * and uses that to perform bounds checks on unterminated stacks. + * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15) + * The bounds check currently doesn't account for non-default thread stack sizes. + */ +#if DEVELOPMENT || DEBUG +static inline void pltrace_bt(uint64_t *rets, int maxframes, uint64_t stacklo, uint64_t stackhi) { + uint64_t *cfp = (uint64_t *) __builtin_frame_address(0); + int plbtf; + + assert(stacklo !=0 && stackhi !=0); + + for (plbtf = 0; plbtf < maxframes; plbtf++) { + if (((uint64_t)cfp == 0) || (((uint64_t)cfp < stacklo) || ((uint64_t)cfp > stackhi))) { + rets[plbtf] = 0; + continue; + } + rets[plbtf] = *(cfp + 1); + cfp = (uint64_t *) (*cfp); + } +} + + +extern uint32_t low_intstack[]; /* bottom */ +extern uint32_t low_eintstack[]; /* top */ +extern char mp_slave_stack[PAGE_SIZE]; + +static inline void pltrace_internal(boolean_t enable) { + cpu_data_t *cdata = current_cpu_datap(); + int cpli = cdata->cpu_preemption_level; + int cplrecord = cdata->cpu_plri; + uint64_t kstackb, kstackt, *plbts; + + assert(cpli >= 0); + + cdata->plrecords[cplrecord].pltype = enable; + cdata->plrecords[cplrecord].plevel = cpli; + + plbts = &cdata->plrecords[cplrecord].plbt[0]; + + cplrecord++; + + if (cplrecord >= MAX_PREEMPTION_RECORDS) { + cplrecord = 0; + } + + cdata->cpu_plri = cplrecord; + /* Obtain the 'current' program counter, initial backtrace + * element. This will also indicate if we were unable to + * trace further up the stack for some reason + */ + __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" + : "=m" (plbts[0]) + : + : "rax"); + + + thread_t cplthread = cdata->cpu_active_thread; + if (cplthread) { + uintptr_t csp; + __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):); + /* Determine which stack we're on to populate stack bounds. + * We don't need to trace across stack boundaries for this + * routine. + */ + kstackb = cdata->cpu_active_stack; + kstackt = kstackb + KERNEL_STACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackt = cdata->cpu_kernel_stack; + kstackb = kstackb - KERNEL_STACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackt = cdata->cpu_int_stack_top; + kstackb = kstackt - INTSTACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackt = (uintptr_t)low_eintstack; + kstackb = (uintptr_t)low_eintstack - INTSTACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackb = (uintptr_t) mp_slave_stack; + kstackt = (uintptr_t) mp_slave_stack + PAGE_SIZE; + } + } + } + } + + if (kstackb) { + pltrace_bt(&plbts[1], MAXPLFRAMES - 1, kstackb, kstackt); + } + } +} + +extern int plctrace_enabled; +#endif /* DEVELOPMENT || DEBUG */ + +static inline void pltrace(boolean_t plenable) { +#if DEVELOPMENT || DEBUG + if (__improbable(plctrace_enabled != 0)) { + pltrace_internal(plenable); + } +#else + (void)plenable; +#endif +} + static inline void -disable_preemption(void) -{ +disable_preemption_internal(void) { + assert(get_preemption_level() >= 0); + +#if defined(__clang__) + cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; + cpu_data->cpu_preemption_level++; +#else __asm__ volatile ("incl %%gs:%P0" - : - : "i" (offsetof(cpu_data_t, cpu_preemption_level))); + : + : "i" (offsetof(cpu_data_t, cpu_preemption_level))); +#endif + pltrace(FALSE); } static inline void -enable_preemption(void) -{ +enable_preemption_internal(void) { assert(get_preemption_level() > 0); - + pltrace(TRUE); +#if defined(__clang__) + cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; + if (0 == --cpu_data->cpu_preemption_level) + kernel_preempt_check(); +#else __asm__ volatile ("decl %%gs:%P0 \n\t" "jne 1f \n\t" "call _kernel_preempt_check \n\t" @@ -265,6 +567,7 @@ enable_preemption(void) : /* no outputs */ : "i" (offsetof(cpu_data_t, cpu_preemption_level)) : "eax", "ecx", "edx", "cc", "memory"); +#endif } static inline void @@ -272,47 +575,80 @@ enable_preemption_no_check(void) { assert(get_preemption_level() > 0); + pltrace(TRUE); +#if defined(__clang__) + cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; + cpu_data->cpu_preemption_level--; +#else __asm__ volatile ("decl %%gs:%P0" : /* no outputs */ : "i" (offsetof(cpu_data_t, cpu_preemption_level)) : "cc", "memory"); +#endif +} + +static inline void +_enable_preemption_no_check(void) { + enable_preemption_no_check(); } static inline void mp_disable_preemption(void) { - disable_preemption(); + disable_preemption_internal(); } static inline void -mp_enable_preemption(void) +_mp_disable_preemption(void) { - enable_preemption(); + disable_preemption_internal(); } static inline void -mp_enable_preemption_no_check(void) +mp_enable_preemption(void) { + enable_preemption_internal(); +} + +static inline void +_mp_enable_preemption(void) { + enable_preemption_internal(); +} + +static inline void +mp_enable_preemption_no_check(void) { enable_preemption_no_check(); } -static inline cpu_data_t * -current_cpu_datap(void) -{ - CPU_DATA_GET(cpu_this, cpu_data_t *); +static inline void +_mp_enable_preemption_no_check(void) { + enable_preemption_no_check(); } +#ifdef XNU_KERNEL_PRIVATE +#define disable_preemption() disable_preemption_internal() +#define enable_preemption() enable_preemption_internal() +#define MACHINE_PREEMPTION_MACROS (1) +#endif + static inline cpu_data_t * -cpu_datap(int cpu) -{ - assert(cpu_data_ptr[cpu]); +cpu_datap(int cpu) { return cpu_data_ptr[cpu]; } -extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); +static inline int +cpu_is_running(int cpu) { + return ((cpu_datap(cpu) != NULL) && (cpu_datap(cpu)->cpu_running)); +} -#else /* !defined(__GNUC__) */ +#ifdef MACH_KERNEL_PRIVATE +static inline cpu_data_t * +cpu_shadowp(int cpu) { + return cpu_data_ptr[cpu]->cd_shadow; +} -#endif /* defined(__GNUC__) */ +#endif +extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); +extern void cpu_data_realloc(void); #endif /* I386_CPU_DATA */