X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3a60a9f5b85abb8c2cf24e1926c5c7b3f608a5e2..refs/heads/master:/osfmk/i386/cpu_data.h diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 172738bc9..10d235517 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -1,78 +1,179 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2020 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ - * + * */ -#ifndef I386_CPU_DATA +#ifndef I386_CPU_DATA #define I386_CPU_DATA #include - -#if defined(__GNUC__) +#include #include #include +#include +#include #include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#if CONFIG_VMX +#include +#endif +#if MONOTONIC +#include +#endif /* MONOTONIC */ + +#include /* * Data structures referenced (anonymously) from per-cpu data: */ -struct cpu_core; struct cpu_cons_buffer; -struct mp_desc_table; - +struct cpu_desc_table; +struct mca_state; +struct prngContext; /* * Data structures embedded in per-cpu data: */ typedef struct rtclock_timer { - uint64_t deadline; - boolean_t is_set; - boolean_t has_expired; + mpqueue_head_t queue; + uint64_t deadline; + uint64_t when_set; + boolean_t has_expired; } rtclock_timer_t; typedef struct { - uint64_t rnt_tsc; /* timestamp */ - uint64_t rnt_nanos; /* nanoseconds */ - uint32_t rnt_scale; /* tsc -> nanosec multiplier */ - uint32_t rnt_shift; /* tsc -> nanosec shift/div */ - uint64_t rnt_step_tsc; /* tsc when scale applied */ - uint64_t rnt_step_nanos; /* ns when scale applied */ -} rtc_nanotime_t; + /* The 'u' suffixed fields store the double-mapped descriptor addresses */ + struct x86_64_tss *cdi_ktssu; + struct x86_64_tss *cdi_ktssb; + x86_64_desc_register_t cdi_gdtu; + x86_64_desc_register_t cdi_gdtb; + x86_64_desc_register_t cdi_idtu; + x86_64_desc_register_t cdi_idtb; + struct real_descriptor *cdi_ldtu; + struct real_descriptor *cdi_ldtb; + vm_offset_t cdi_sstku; + vm_offset_t cdi_sstkb; +} cpu_desc_index_t; + +typedef enum { + TASK_MAP_32BIT, /* 32-bit user, compatibility mode */ + TASK_MAP_64BIT, /* 64-bit user thread, shared space */ +} task_map_t; + +/* + * This structure is used on entry into the (uber-)kernel on syscall from + * a 64-bit user. It contains the address of the machine state save area + * for the current thread and a temporary place to save the user's rsp + * before loading this address into rsp. + */ typedef struct { - struct i386_tss *cdi_ktss; -#if MACH_KDB - struct i386_tss *cdi_dbtss; -#endif /* MACH_KDB */ - struct fake_descriptor *cdi_gdt; - struct fake_descriptor *cdi_idt; - struct fake_descriptor *cdi_ldt; -} cpu_desc_index_t; + addr64_t cu_isf; /* thread->pcb->iss.isf */ + uint64_t cu_tmp; /* temporary scratch */ + addr64_t cu_user_gs_base; +} cpu_uber_t; +typedef uint16_t pcid_t; +typedef uint8_t pcid_ref_t; + +#define CPU_RTIME_BINS (12) +#define CPU_ITIME_BINS (CPU_RTIME_BINS) + +#define MAX_TRACE_BTFRAMES (16) +typedef struct { + boolean_t pltype; + int plevel; + uint64_t plbt[MAX_TRACE_BTFRAMES]; +} plrecord_t; + +#if DEVELOPMENT || DEBUG +typedef enum { + IOTRACE_PHYS_READ = 1, + IOTRACE_PHYS_WRITE, + IOTRACE_IO_READ, + IOTRACE_IO_WRITE, + IOTRACE_PORTIO_READ, + IOTRACE_PORTIO_WRITE +} iotrace_type_e; + +typedef struct { + iotrace_type_e iotype; + int size; + uint64_t vaddr; + uint64_t paddr; + uint64_t val; + uint64_t start_time_abs; + uint64_t duration; + uint64_t backtrace[MAX_TRACE_BTFRAMES]; +} iotrace_entry_t; + +typedef struct { + int vector; /* Vector number of interrupt */ + thread_t curthread; /* Current thread at the time of the interrupt */ + uint64_t interrupted_pc; + int curpl; /* Current preemption level */ + int curil; /* Current interrupt level */ + uint64_t start_time_abs; + uint64_t duration; + uint64_t backtrace[MAX_TRACE_BTFRAMES]; +} traptrace_entry_t; + +#define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64) +#define IOTRACE_MAX_ENTRIES_PER_CPU (256) +extern volatile int mmiotrace_enabled; +extern int iotrace_generators; +extern int iotrace_entries_per_cpu; +extern int *iotrace_next; +extern iotrace_entry_t **iotrace_ring; + +#define TRAPTRACE_INVALID_INDEX (~0U) +#define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16) +#define TRAPTRACE_MAX_ENTRIES_PER_CPU (256) +extern volatile int traptrace_enabled; +extern int traptrace_generators; +extern int traptrace_entries_per_cpu; +extern int *traptrace_next; +extern traptrace_entry_t **traptrace_ring; +#endif /* DEVELOPMENT || DEBUG */ /* * Per-cpu data. @@ -81,132 +182,505 @@ typedef struct { * current_cpu_datap() macro. For speed, the %gs segment is based here, and * using this, inlines provides single-instruction access to frequently used * members - such as get_cpu_number()/cpu_number(), and get_active_thread()/ - * current_thread(). - * + * current_thread(). + * * Cpu data owned by another processor can be accessed using the * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu * pointers. */ -typedef struct cpu_data -{ - struct cpu_data *cpu_this; /* pointer to myself */ - thread_t cpu_active_thread; - thread_t cpu_active_kloaded; - vm_offset_t cpu_active_stack; - vm_offset_t cpu_kernel_stack; - vm_offset_t cpu_int_stack_top; - int cpu_preemption_level; - int cpu_simple_lock_count; - int cpu_interrupt_level; - int cpu_number; /* Logical CPU */ - int cpu_phys_number; /* Physical CPU */ - cpu_id_t cpu_id; /* Platform Expert */ - int cpu_signals; /* IPI events */ - int cpu_mcount_off; /* mcount recursion */ - ast_t cpu_pending_ast; - int cpu_type; - int cpu_subtype; - int cpu_threadtype; - int cpu_running; - struct cpu_core *cpu_core; /* cpu's parent core */ - uint64_t cpu_rtc_tick_deadline; - uint64_t cpu_rtc_intr_deadline; - rtclock_timer_t cpu_rtc_timer; - rtc_nanotime_t cpu_rtc_nanotime; - void *cpu_console_buf; - struct processor *cpu_processor; - struct cpu_pmap *cpu_pmap; - struct mp_desc_table *cpu_desc_tablep; - cpu_desc_index_t cpu_desc_index; - boolean_t cpu_iflag; -#ifdef MACH_KDB - /* XXX Untested: */ - int cpu_db_pass_thru; - vm_offset_t cpu_db_stacks; - struct i386_saved_state *cpu_kdb_saved_state; - spl_t cpu_kdb_saved_ipl; - int cpu_kdb_is_slave; - int cpu_kdb_active; -#endif /* MACH_KDB */ - int cpu_hibernate; +typedef struct { + pcid_t cpu_pcid_free_hint; +#define PMAP_PCID_MAX_PCID (0x800) + pcid_ref_t cpu_pcid_refcounts[PMAP_PCID_MAX_PCID]; + pmap_t cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID]; +} pcid_cdata_t; + +typedef struct cpu_data { + struct pal_cpu_data cpu_pal_data; /* PAL-specific data */ +#define cpu_pd cpu_pal_data /* convenience alias */ + struct cpu_data *cpu_this; /* pointer to myself */ + vm_offset_t cpu_pcpu_base; + thread_t cpu_active_thread; + thread_t cpu_nthread; + int cpu_number; /* Logical CPU */ + void *cpu_int_state; /* interrupt state */ + vm_offset_t cpu_active_stack; /* kernel stack base */ + vm_offset_t cpu_kernel_stack; /* kernel stack top */ + vm_offset_t cpu_int_stack_top; + volatile int cpu_signals; /* IPI events */ + volatile int cpu_prior_signals; /* Last set of events, + * debugging + */ + ast_t cpu_pending_ast; + /* + * Note if rearranging fields: + * We want cpu_preemption_level on a different + * cache line than cpu_active_thread + * for optimizing mtx_spin phase. + */ + int cpu_interrupt_level; + volatile int cpu_preemption_level; + volatile int cpu_running; +#if !MONOTONIC + boolean_t cpu_fixed_pmcs_enabled; +#endif /* !MONOTONIC */ + rtclock_timer_t rtclock_timer; + volatile addr64_t cpu_active_cr3 __attribute((aligned(64))); + union { + volatile uint32_t cpu_tlb_invalid; + struct { + volatile uint16_t cpu_tlb_invalid_local; + volatile uint16_t cpu_tlb_invalid_global; + }; + }; + uint64_t cpu_ip_desc[2]; + volatile task_map_t cpu_task_map; + volatile addr64_t cpu_task_cr3; + addr64_t cpu_kernel_cr3; + volatile addr64_t cpu_ucr3; + volatile addr64_t cpu_shadowtask_cr3; + boolean_t cpu_pagezero_mapped; + cpu_uber_t cpu_uber; +/* Double-mapped per-CPU exception stack address */ + uintptr_t cd_estack; + int cpu_xstate; + int cpu_curtask_has_ldt; + int cpu_curthread_do_segchk; +/* Address of shadowed, partially mirrored CPU data structures located + * in the double mapped PML4 + */ + void *cd_shadow; + union { + volatile uint32_t cpu_tlb_invalid_count; + struct { + volatile uint16_t cpu_tlb_invalid_local_count; + volatile uint16_t cpu_tlb_invalid_global_count; + }; + }; + + uint16_t cpu_tlb_gen_counts_local[MAX_CPUS]; + uint16_t cpu_tlb_gen_counts_global[MAX_CPUS]; + + struct processor *cpu_processor; + struct real_descriptor *cpu_ldtp; + struct cpu_desc_table *cpu_desc_tablep; + cpu_desc_index_t cpu_desc_index; + int cpu_ldt; + +#define HWINTCNT_SIZE 256 + uint32_t cpu_hwIntCnt[HWINTCNT_SIZE]; /* Interrupt counts */ + uint64_t cpu_hwIntpexits[HWINTCNT_SIZE]; + uint64_t cpu_dr7; /* debug control register */ + uint64_t cpu_int_event_time; /* intr entry/exit time */ + pal_rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ +#if KPC + /* double-buffered performance counter data */ + uint64_t *cpu_kpc_buf[2]; + /* PMC shadow and reload value buffers */ + uint64_t *cpu_kpc_shadow; + uint64_t *cpu_kpc_reload; +#endif +#if MONOTONIC + struct mt_cpu cpu_monotonic; +#endif /* MONOTONIC */ + uint32_t cpu_pmap_pcid_enabled; + pcid_t cpu_active_pcid; + pcid_t cpu_last_pcid; + pcid_t cpu_kernel_pcid; + volatile pcid_ref_t *cpu_pmap_pcid_coherentp; + volatile pcid_ref_t *cpu_pmap_pcid_coherentp_kernel; + pcid_cdata_t *cpu_pcid_data; +#ifdef PCID_STATS + uint64_t cpu_pmap_pcid_flushes; + uint64_t cpu_pmap_pcid_preserves; +#endif + uint64_t cpu_aperf; + uint64_t cpu_mperf; + uint64_t cpu_c3res; + uint64_t cpu_c6res; + uint64_t cpu_c7res; + uint64_t cpu_itime_total; + uint64_t cpu_rtime_total; + uint64_t cpu_ixtime; + uint64_t cpu_idle_exits; + /* + * Note that the cacheline-copy mechanism uses the cpu_rtimes field in the shadow CPU + * structures to temporarily stash the code cacheline that includes the instruction + * pointer at the time of the fault (this field is otherwise unused in the shadow + * CPU structures). + */ + uint64_t cpu_rtimes[CPU_RTIME_BINS]; + uint64_t cpu_itimes[CPU_ITIME_BINS]; +#if !MONOTONIC + uint64_t cpu_cur_insns; + uint64_t cpu_cur_ucc; + uint64_t cpu_cur_urc; +#endif /* !MONOTONIC */ + uint64_t cpu_gpmcs[4]; + uint64_t cpu_max_observed_int_latency; + int cpu_max_observed_int_latency_vector; + volatile boolean_t cpu_NMI_acknowledged; + uint64_t debugger_entry_time; + uint64_t debugger_ipi_time; + /* A separate nested interrupt stack flag, to account + * for non-nested interrupts arriving while on the interrupt stack + * Currently only occurs when AICPM enables interrupts on the + * interrupt stack during processor offlining. + */ + uint32_t cpu_nested_istack; + uint32_t cpu_nested_istack_events; + x86_saved_state64_t *cpu_fatal_trap_state; + x86_saved_state64_t *cpu_post_fatal_trap_state; +#if CONFIG_VMX + vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */ +#endif +#if CONFIG_MCA + struct mca_state *cpu_mca_state; /* State at MC fault */ +#endif + int cpu_type; + int cpu_subtype; + int cpu_threadtype; + boolean_t cpu_iflag; + boolean_t cpu_boot_complete; + int cpu_hibernate; +#define MAX_PREEMPTION_RECORDS (8) +#if DEVELOPMENT || DEBUG + int cpu_plri; + plrecord_t plrecords[MAX_PREEMPTION_RECORDS]; +#endif + void *cpu_console_buf; + struct x86_lcpu lcpu; + int cpu_phys_number; /* Physical CPU */ + cpu_id_t cpu_id; /* Platform Expert */ +#if DEBUG + uint64_t cpu_entry_cr3; + uint64_t cpu_exit_cr3; + uint64_t cpu_pcid_last_cr3; +#endif + boolean_t cpu_rendezvous_in_progress; +#if CST_DEMOTION_DEBUG + /* Count of thread wakeups issued by this processor */ + uint64_t cpu_wakeups_issued_total; +#endif +#if DEBUG || DEVELOPMENT + uint64_t tsc_sync_delta; +#endif } cpu_data_t; -extern cpu_data_t *cpu_data_ptr[]; -extern cpu_data_t cpu_data_master; +extern cpu_data_t *cpu_data_ptr[]; -/* Macro to generate inline bodies to retrieve per-cpu data fields. */ -#define offsetof(TYPE,MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#define CPU_DATA_GET(member,type) \ - type ret; \ - __asm__ volatile ("movl %%gs:%P1,%0" \ - : "=r" (ret) \ - : "i" (offsetof(cpu_data_t,member))); \ - return ret; +/* + * __SEG_GS marks %gs-relative operations: + * https://clang.llvm.org/docs/LanguageExtensions.html#memory-references-to-specified-segments + * https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces + */ +#if defined(__SEG_GS) +// __seg_gs exists +#elif defined(__clang__) +#define __seg_gs __attribute__((address_space(256))) +#else +#error use a compiler that supports address spaces or __seg_gs +#endif + +#define CPU_DATA() ((cpu_data_t __seg_gs *)0UL) /* * Everyone within the osfmk part of the kernel can use the fast * inline versions of these routines. Everyone outside, must call * the real thing, */ + + +/* + * The "volatile" flavor of current_thread() is intended for use by + * scheduler code which may need to update the thread pointer in the + * course of a context switch. Any call to current_thread() made + * prior to the thread pointer update should be safe to optimize away + * as it should be consistent with that thread's state to the extent + * the compiler can reason about it. Likewise, the context switch + * path will eventually result in an arbitrary branch to the new + * thread's pc, about which the compiler won't be able to reason. + * Thus any compile-time optimization of current_thread() calls made + * within the new thread should be safely encapsulated in its + * register/stack state. The volatile form therefore exists to cover + * the window between the thread pointer update and the branch to + * the new pc. + */ static inline thread_t -get_active_thread(void) +get_active_thread_volatile(void) { - CPU_DATA_GET(cpu_active_thread,thread_t) + return CPU_DATA()->cpu_active_thread; } -#define current_thread_fast() get_active_thread() -#define current_thread() current_thread_fast() -static inline int -get_preemption_level(void) +static inline __attribute__((const)) thread_t +get_active_thread(void) { - CPU_DATA_GET(cpu_preemption_level,int) + return CPU_DATA()->cpu_active_thread; } + +#define current_thread_fast() get_active_thread() +#define current_thread_volatile() get_active_thread_volatile() +#define current_thread() current_thread_fast() + +#define cpu_mode_is64bit() TRUE + static inline int -get_simple_lock_count(void) +get_preemption_level(void) { - CPU_DATA_GET(cpu_simple_lock_count,int) + return CPU_DATA()->cpu_preemption_level; } static inline int get_interrupt_level(void) { - CPU_DATA_GET(cpu_interrupt_level,int) + return CPU_DATA()->cpu_interrupt_level; } static inline int get_cpu_number(void) { - CPU_DATA_GET(cpu_number,int) + return CPU_DATA()->cpu_number; +} +static inline vm_offset_t +get_current_percpu_base(void) +{ + return CPU_DATA()->cpu_pcpu_base; } static inline int get_cpu_phys_number(void) { - CPU_DATA_GET(cpu_phys_number,int) + return CPU_DATA()->cpu_phys_number; } -static inline struct -cpu_core * get_cpu_core(void) + +static inline cpu_data_t * +current_cpu_datap(void) { - CPU_DATA_GET(cpu_core,struct cpu_core *) + return CPU_DATA()->cpu_this; } +/* + * Facility to diagnose preemption-level imbalances, which are otherwise + * challenging to debug. On each operation that enables or disables preemption, + * we record a backtrace into a per-CPU ring buffer, along with the current + * preemption level and operation type. Thus, if an imbalance is observed, + * one can examine these per-CPU records to determine which codepath failed + * to re-enable preemption, enabled premption without a corresponding + * disablement etc. The backtracer determines which stack is currently active, + * and uses that to perform bounds checks on unterminated stacks. + * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15) + * The bounds check currently doesn't account for non-default thread stack sizes. + */ +#if DEVELOPMENT || DEBUG static inline void -disable_preemption(void) +rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata, uint64_t frameptr, bool use_cursp) { - __asm__ volatile ("incl %%gs:%P0" - : - : "i" (offsetof(cpu_data_t, cpu_preemption_level))); + extern uint32_t low_intstack[]; /* bottom */ + extern uint32_t low_eintstack[]; /* top */ + extern char mp_slave_stack[]; + int btidx = 0; + + uint64_t kstackb, kstackt; + + /* Obtain the 'current' program counter, initial backtrace + * element. This will also indicate if we were unable to + * trace further up the stack for some reason + */ + if (use_cursp) { + __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" + : "=m" (rets[btidx++]) + : + : "rax"); + } + + thread_t cplthread = cdata->cpu_active_thread; + if (cplthread) { + uintptr_t csp; + if (use_cursp == true) { + __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):); + } else { + csp = frameptr; + } + /* Determine which stack we're on to populate stack bounds. + * We don't need to trace across stack boundaries for this + * routine. + */ + kstackb = cdata->cpu_active_stack; + kstackt = kstackb + KERNEL_STACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackt = cdata->cpu_kernel_stack; + kstackb = kstackt - KERNEL_STACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackt = cdata->cpu_int_stack_top; + kstackb = kstackt - INTSTACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackt = (uintptr_t)low_eintstack; + kstackb = kstackt - INTSTACK_SIZE; + if (csp < kstackb || csp > kstackt) { + kstackb = (uintptr_t) mp_slave_stack; + kstackt = kstackb + PAGE_SIZE; + } else { + kstackb = 0; + kstackt = 0; + } + } + } + } + + if (__probable(kstackb && kstackt)) { + uint64_t *cfp = (uint64_t *) frameptr; + int rbbtf; + + for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) { + if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) { + rets[rbbtf] = 0; + continue; + } + rets[rbbtf] = *(cfp + 1); + cfp = (uint64_t *) (*cfp); + } + } + } } +__attribute__((noinline)) static inline void -enable_preemption(void) +pltrace_internal(boolean_t enable) { - assert(get_preemption_level() > 0); + cpu_data_t *cdata = current_cpu_datap(); + int cpli = cdata->cpu_preemption_level; + int cplrecord = cdata->cpu_plri; + uint64_t *plbts; + + assert(cpli >= 0); + + cdata->plrecords[cplrecord].pltype = enable; + cdata->plrecords[cplrecord].plevel = cpli; + + plbts = &cdata->plrecords[cplrecord].plbt[0]; + + cplrecord++; + + if (cplrecord >= MAX_PREEMPTION_RECORDS) { + cplrecord = 0; + } - __asm__ volatile ("decl %%gs:%P0 \n\t" - "jne 1f \n\t" - "call _kernel_preempt_check \n\t" - "1:" - : /* no outputs */ - : "i" (offsetof(cpu_data_t, cpu_preemption_level)) - : "eax", "ecx", "edx", "cc", "memory"); + cdata->cpu_plri = cplrecord; + + rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), false); +} + +extern int plctrace_enabled; + +static inline void +iotrace(iotrace_type_e type, uint64_t vaddr, uint64_t paddr, int size, uint64_t val, + uint64_t sabs, uint64_t duration) +{ + cpu_data_t *cdata; + int cpu_num, nextidx; + iotrace_entry_t *cur_iotrace_ring; + + if (__improbable(mmiotrace_enabled == 0 || iotrace_generators == 0)) { + return; + } + + cdata = current_cpu_datap(); + cpu_num = cdata->cpu_number; + nextidx = iotrace_next[cpu_num]; + cur_iotrace_ring = iotrace_ring[cpu_num]; + + cur_iotrace_ring[nextidx].iotype = type; + cur_iotrace_ring[nextidx].vaddr = vaddr; + cur_iotrace_ring[nextidx].paddr = paddr; + cur_iotrace_ring[nextidx].size = size; + cur_iotrace_ring[nextidx].val = val; + cur_iotrace_ring[nextidx].start_time_abs = sabs; + cur_iotrace_ring[nextidx].duration = duration; + + iotrace_next[cpu_num] = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1); + + rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0], + MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true); +} + +static inline uint32_t +traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr) +{ + cpu_data_t *cdata; + unsigned int cpu_num, nextidx; + traptrace_entry_t *cur_traptrace_ring; + + if (__improbable(traptrace_enabled == 0 || traptrace_generators == 0)) { + return TRAPTRACE_INVALID_INDEX; + } + + assert(ml_get_interrupts_enabled() == FALSE); + cdata = current_cpu_datap(); + cpu_num = (unsigned int)cdata->cpu_number; + nextidx = (unsigned int)traptrace_next[cpu_num]; + /* prevent nested interrupts from clobbering this record */ + traptrace_next[cpu_num] = (int)(((nextidx + 1) >= (unsigned int)traptrace_entries_per_cpu) ? 0 : (nextidx + 1)); + + cur_traptrace_ring = traptrace_ring[cpu_num]; + + cur_traptrace_ring[nextidx].vector = vecnum; + cur_traptrace_ring[nextidx].curthread = current_thread(); + cur_traptrace_ring[nextidx].interrupted_pc = ipc; + cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level; + cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level; + cur_traptrace_ring[nextidx].start_time_abs = sabs; + cur_traptrace_ring[nextidx].duration = ~0ULL; + + rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0], + MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false); + + assert(nextidx <= 0xFFFF); + + return (uint32_t)((cpu_num << 16) | nextidx); +} + +static inline void +traptrace_end(uint32_t index, uint64_t eabs) +{ + if (index != TRAPTRACE_INVALID_INDEX) { + traptrace_entry_t *ttentp = &traptrace_ring[index >> 16][index & 0xFFFF]; + + ttentp->duration = eabs - ttentp->start_time_abs; + } +} + +#endif /* DEVELOPMENT || DEBUG */ + +__header_always_inline void +pltrace(boolean_t plenable) +{ +#if DEVELOPMENT || DEBUG + if (__improbable(plctrace_enabled != 0)) { + pltrace_internal(plenable); + } +#else + (void)plenable; +#endif +} + +static inline void +disable_preemption_internal(void) +{ + assert(get_preemption_level() >= 0); + + os_compiler_barrier(); + CPU_DATA()->cpu_preemption_level++; + os_compiler_barrier(); + pltrace(FALSE); +} + +static inline void +enable_preemption_internal(void) +{ + assert(get_preemption_level() > 0); + pltrace(TRUE); + os_compiler_barrier(); + if (0 == --CPU_DATA()->cpu_preemption_level) { + kernel_preempt_check(); + } + os_compiler_barrier(); } static inline void @@ -214,22 +688,40 @@ enable_preemption_no_check(void) { assert(get_preemption_level() > 0); - __asm__ volatile ("decl %%gs:%P0" - : /* no outputs */ - : "i" (offsetof(cpu_data_t, cpu_preemption_level)) - : "cc", "memory"); + pltrace(TRUE); + os_compiler_barrier(); + CPU_DATA()->cpu_preemption_level--; + os_compiler_barrier(); +} + +static inline void +_enable_preemption_no_check(void) +{ + enable_preemption_no_check(); } static inline void mp_disable_preemption(void) { - disable_preemption(); + disable_preemption_internal(); +} + +static inline void +_mp_disable_preemption(void) +{ + disable_preemption_internal(); } static inline void mp_enable_preemption(void) { - enable_preemption(); + enable_preemption_internal(); +} + +static inline void +_mp_enable_preemption(void) +{ + enable_preemption_internal(); } static inline void @@ -238,23 +730,39 @@ mp_enable_preemption_no_check(void) enable_preemption_no_check(); } -static inline cpu_data_t * -current_cpu_datap(void) +static inline void +_mp_enable_preemption_no_check(void) { - CPU_DATA_GET(cpu_this, cpu_data_t *); + enable_preemption_no_check(); } +#ifdef XNU_KERNEL_PRIVATE +#define disable_preemption() disable_preemption_internal() +#define enable_preemption() enable_preemption_internal() +#define MACHINE_PREEMPTION_MACROS (1) +#endif + static inline cpu_data_t * cpu_datap(int cpu) { - assert(cpu_data_ptr[cpu]); return cpu_data_ptr[cpu]; } -extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); +static inline int +cpu_is_running(int cpu) +{ + return (cpu_datap(cpu) != NULL) && (cpu_datap(cpu)->cpu_running); +} -#else /* !defined(__GNUC__) */ +#ifdef MACH_KERNEL_PRIVATE +static inline cpu_data_t * +cpu_shadowp(int cpu) +{ + return cpu_data_ptr[cpu]->cd_shadow; +} -#endif /* defined(__GNUC__) */ +#endif +extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); +extern void cpu_data_realloc(void); -#endif /* I386_CPU_DATA */ +#endif /* I386_CPU_DATA */