2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
36 #include <mach_assert.h>
38 #include <kern/assert.h>
39 #include <kern/kern_types.h>
40 #include <kern/queue.h>
41 #include <kern/processor.h>
43 #include <pexpert/pexpert.h>
44 #include <mach/i386/thread_status.h>
45 #include <mach/i386/vm_param.h>
46 #include <i386/locks.h>
47 #include <i386/rtclock_protos.h>
48 #include <i386/pmCPU.h>
49 #include <i386/cpu_topology.h>
53 #include <i386/vmx/vmx_cpu.h>
57 #include <machine/monotonic.h>
58 #endif /* MONOTONIC */
60 #include <machine/pal_routines.h>
63 * Data structures referenced (anonymously) from per-cpu data:
65 struct cpu_cons_buffer
;
66 struct cpu_desc_table
;
71 * Data structures embedded in per-cpu data:
73 typedef struct rtclock_timer
{
77 boolean_t has_expired
;
81 /* The 'u' suffixed fields store the double-mapped descriptor addresses */
82 struct x86_64_tss
*cdi_ktssu
;
83 struct x86_64_tss
*cdi_ktssb
;
84 x86_64_desc_register_t cdi_gdtu
;
85 x86_64_desc_register_t cdi_gdtb
;
86 x86_64_desc_register_t cdi_idtu
;
87 x86_64_desc_register_t cdi_idtb
;
88 struct fake_descriptor
*cdi_ldtu
;
89 struct fake_descriptor
*cdi_ldtb
;
90 vm_offset_t cdi_sstku
;
91 vm_offset_t cdi_sstkb
;
95 TASK_MAP_32BIT
, /* 32-bit user, compatibility mode */
96 TASK_MAP_64BIT
, /* 64-bit user thread, shared space */
101 * This structure is used on entry into the (uber-)kernel on syscall from
102 * a 64-bit user. It contains the address of the machine state save area
103 * for the current thread and a temporary place to save the user's rsp
104 * before loading this address into rsp.
107 addr64_t cu_isf
; /* thread->pcb->iss.isf */
108 uint64_t cu_tmp
; /* temporary scratch */
109 addr64_t cu_user_gs_base
;
112 typedef uint16_t pcid_t
;
113 typedef uint8_t pcid_ref_t
;
115 #define CPU_RTIME_BINS (12)
116 #define CPU_ITIME_BINS (CPU_RTIME_BINS)
118 #define MAXPLFRAMES (16)
122 uint64_t plbt
[MAXPLFRAMES
];
128 * Each processor has a per-cpu data area which is dereferenced through the
129 * current_cpu_datap() macro. For speed, the %gs segment is based here, and
130 * using this, inlines provides single-instruction access to frequently used
131 * members - such as get_cpu_number()/cpu_number(), and get_active_thread()/
134 * Cpu data owned by another processor can be accessed using the
135 * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu
139 pcid_t cpu_pcid_free_hint
;
140 #define PMAP_PCID_MAX_PCID (0x800)
141 pcid_ref_t cpu_pcid_refcounts
[PMAP_PCID_MAX_PCID
];
142 pmap_t cpu_pcid_last_pmap_dispatched
[PMAP_PCID_MAX_PCID
];
145 typedef struct cpu_data
147 struct pal_cpu_data cpu_pal_data
; /* PAL-specific data */
148 #define cpu_pd cpu_pal_data /* convenience alias */
149 struct cpu_data
*cpu_this
; /* pointer to myself */
150 thread_t cpu_active_thread
;
151 thread_t cpu_nthread
;
152 volatile int cpu_preemption_level
;
153 int cpu_number
; /* Logical CPU */
154 void *cpu_int_state
; /* interrupt state */
155 vm_offset_t cpu_active_stack
; /* kernel stack base */
156 vm_offset_t cpu_kernel_stack
; /* kernel stack top */
157 vm_offset_t cpu_int_stack_top
;
158 int cpu_interrupt_level
;
159 volatile int cpu_signals
; /* IPI events */
160 volatile int cpu_prior_signals
; /* Last set of events,
163 ast_t cpu_pending_ast
;
164 volatile int cpu_running
;
166 boolean_t cpu_fixed_pmcs_enabled
;
167 #endif /* !MONOTONIC */
168 rtclock_timer_t rtclock_timer
;
169 uint64_t quantum_timer_deadline
;
170 volatile addr64_t cpu_active_cr3
__attribute((aligned(64)));
172 volatile uint32_t cpu_tlb_invalid
;
174 volatile uint16_t cpu_tlb_invalid_local
;
175 volatile uint16_t cpu_tlb_invalid_global
;
178 volatile task_map_t cpu_task_map
;
179 volatile addr64_t cpu_task_cr3
;
180 addr64_t cpu_kernel_cr3
;
181 volatile addr64_t cpu_ucr3
;
182 boolean_t cpu_pagezero_mapped
;
184 /* Double-mapped per-CPU exception stack address */
187 /* Address of shadowed, partially mirrored CPU data structures located
188 * in the double mapped PML4
191 struct processor
*cpu_processor
;
192 #if NCOPY_WINDOWS > 0
193 struct cpu_pmap
*cpu_pmap
;
195 struct real_descriptor
*cpu_ldtp
;
196 struct cpu_desc_table
*cpu_desc_tablep
;
197 cpu_desc_index_t cpu_desc_index
;
199 #if NCOPY_WINDOWS > 0
200 vm_offset_t cpu_copywindow_base
;
201 uint64_t *cpu_copywindow_pdp
;
203 vm_offset_t cpu_physwindow_base
;
204 uint64_t *cpu_physwindow_ptep
;
207 #define HWINTCNT_SIZE 256
208 uint32_t cpu_hwIntCnt
[HWINTCNT_SIZE
]; /* Interrupt counts */
209 uint64_t cpu_hwIntpexits
[HWINTCNT_SIZE
];
210 uint64_t cpu_dr7
; /* debug control register */
211 uint64_t cpu_int_event_time
; /* intr entry/exit time */
212 pal_rtc_nanotime_t
*cpu_nanotime
; /* Nanotime info */
214 /* double-buffered performance counter data */
215 uint64_t *cpu_kpc_buf
[2];
216 /* PMC shadow and reload value buffers */
217 uint64_t *cpu_kpc_shadow
;
218 uint64_t *cpu_kpc_reload
;
221 struct mt_cpu cpu_monotonic
;
222 #endif /* MONOTONIC */
223 uint32_t cpu_pmap_pcid_enabled
;
224 pcid_t cpu_active_pcid
;
225 pcid_t cpu_last_pcid
;
226 pcid_t cpu_kernel_pcid
;
227 volatile pcid_ref_t
*cpu_pmap_pcid_coherentp
;
228 volatile pcid_ref_t
*cpu_pmap_pcid_coherentp_kernel
;
229 pcid_cdata_t
*cpu_pcid_data
;
231 uint64_t cpu_pmap_pcid_flushes
;
232 uint64_t cpu_pmap_pcid_preserves
;
239 uint64_t cpu_itime_total
;
240 uint64_t cpu_rtime_total
;
242 uint64_t cpu_idle_exits
;
243 uint64_t cpu_rtimes
[CPU_RTIME_BINS
];
244 uint64_t cpu_itimes
[CPU_ITIME_BINS
];
246 uint64_t cpu_cur_insns
;
247 uint64_t cpu_cur_ucc
;
248 uint64_t cpu_cur_urc
;
249 #endif /* !MONOTONIC */
250 uint64_t cpu_gpmcs
[4];
251 uint64_t cpu_max_observed_int_latency
;
252 int cpu_max_observed_int_latency_vector
;
253 volatile boolean_t cpu_NMI_acknowledged
;
254 uint64_t debugger_entry_time
;
255 uint64_t debugger_ipi_time
;
256 /* A separate nested interrupt stack flag, to account
257 * for non-nested interrupts arriving while on the interrupt stack
258 * Currently only occurs when AICPM enables interrupts on the
259 * interrupt stack during processor offlining.
261 uint32_t cpu_nested_istack
;
262 uint32_t cpu_nested_istack_events
;
263 x86_saved_state64_t
*cpu_fatal_trap_state
;
264 x86_saved_state64_t
*cpu_post_fatal_trap_state
;
266 vmx_cpu_t cpu_vmx
; /* wonderful world of virtualization */
269 struct mca_state
*cpu_mca_state
; /* State at MC fault */
271 struct prngContext
*cpu_prng
; /* PRNG's context */
276 boolean_t cpu_boot_complete
;
278 #define MAX_PREEMPTION_RECORDS (8)
279 #if DEVELOPMENT || DEBUG
281 plrecord_t plrecords
[MAX_PREEMPTION_RECORDS
];
283 void *cpu_console_buf
;
284 struct x86_lcpu lcpu
;
285 int cpu_phys_number
; /* Physical CPU */
286 cpu_id_t cpu_id
; /* Platform Expert */
288 uint64_t cpu_entry_cr3
;
289 uint64_t cpu_exit_cr3
;
290 uint64_t cpu_pcid_last_cr3
;
294 extern cpu_data_t
*cpu_data_ptr
[];
296 /* Macro to generate inline bodies to retrieve per-cpu data fields. */
297 #if defined(__clang__)
298 #define GS_RELATIVE volatile __attribute__((address_space(256)))
300 #define offsetof(TYPE,MEMBER) __builtin_offsetof(TYPE,MEMBER)
303 #define CPU_DATA_GET(member,type) \
304 cpu_data_t GS_RELATIVE *cpu_data = \
305 (cpu_data_t GS_RELATIVE *)0UL; \
307 ret = cpu_data->member; \
310 #define CPU_DATA_GET_INDEX(member,index,type) \
311 cpu_data_t GS_RELATIVE *cpu_data = \
312 (cpu_data_t GS_RELATIVE *)0UL; \
314 ret = cpu_data->member[index]; \
317 #define CPU_DATA_SET(member,value) \
318 cpu_data_t GS_RELATIVE *cpu_data = \
319 (cpu_data_t GS_RELATIVE *)0UL; \
320 cpu_data->member = value;
322 #define CPU_DATA_XCHG(member,value,type) \
323 cpu_data_t GS_RELATIVE *cpu_data = \
324 (cpu_data_t GS_RELATIVE *)0UL; \
326 ret = cpu_data->member; \
327 cpu_data->member = value; \
330 #else /* !defined(__clang__) */
333 #define offsetof(TYPE,MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
334 #endif /* offsetof */
335 #define CPU_DATA_GET(member,type) \
337 __asm__ volatile ("mov %%gs:%P1,%0" \
339 : "i" (offsetof(cpu_data_t,member))); \
342 #define CPU_DATA_GET_INDEX(member,index,type) \
344 __asm__ volatile ("mov %%gs:(%1),%0" \
346 : "r" (offsetof(cpu_data_t,member[index]))); \
349 #define CPU_DATA_SET(member,value) \
350 __asm__ volatile ("mov %0,%%gs:%P1" \
352 : "r" (value), "i" (offsetof(cpu_data_t,member)));
354 #define CPU_DATA_XCHG(member,value,type) \
356 __asm__ volatile ("xchg %0,%%gs:%P1" \
358 : "i" (offsetof(cpu_data_t,member)), "0" (value)); \
361 #endif /* !defined(__clang__) */
364 * Everyone within the osfmk part of the kernel can use the fast
365 * inline versions of these routines. Everyone outside, must call
368 static inline thread_t
369 get_active_thread(void)
371 CPU_DATA_GET(cpu_active_thread
,thread_t
)
373 #define current_thread_fast() get_active_thread()
374 #define current_thread() current_thread_fast()
376 #define cpu_mode_is64bit() TRUE
379 get_preemption_level(void)
381 CPU_DATA_GET(cpu_preemption_level
,int)
384 get_interrupt_level(void)
386 CPU_DATA_GET(cpu_interrupt_level
,int)
391 CPU_DATA_GET(cpu_number
,int)
394 get_cpu_phys_number(void)
396 CPU_DATA_GET(cpu_phys_number
,int)
399 static inline cpu_data_t
*
400 current_cpu_datap(void) {
401 CPU_DATA_GET(cpu_this
, cpu_data_t
*);
405 * Facility to diagnose preemption-level imbalances, which are otherwise
406 * challenging to debug. On each operation that enables or disables preemption,
407 * we record a backtrace into a per-CPU ring buffer, along with the current
408 * preemption level and operation type. Thus, if an imbalance is observed,
409 * one can examine these per-CPU records to determine which codepath failed
410 * to re-enable preemption, enabled premption without a corresponding
411 * disablement etc. The backtracer determines which stack is currently active,
412 * and uses that to perform bounds checks on unterminated stacks.
413 * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15)
414 * The bounds check currently doesn't account for non-default thread stack sizes.
416 #if DEVELOPMENT || DEBUG
417 static inline void pltrace_bt(uint64_t *rets
, int maxframes
, uint64_t stacklo
, uint64_t stackhi
) {
418 uint64_t *cfp
= (uint64_t *) __builtin_frame_address(0);
421 assert(stacklo
!=0 && stackhi
!=0);
423 for (plbtf
= 0; plbtf
< maxframes
; plbtf
++) {
424 if (((uint64_t)cfp
== 0) || (((uint64_t)cfp
< stacklo
) || ((uint64_t)cfp
> stackhi
))) {
428 rets
[plbtf
] = *(cfp
+ 1);
429 cfp
= (uint64_t *) (*cfp
);
434 extern uint32_t low_intstack
[]; /* bottom */
435 extern uint32_t low_eintstack
[]; /* top */
436 extern char mp_slave_stack
[PAGE_SIZE
];
438 static inline void pltrace_internal(boolean_t enable
) {
439 cpu_data_t
*cdata
= current_cpu_datap();
440 int cpli
= cdata
->cpu_preemption_level
;
441 int cplrecord
= cdata
->cpu_plri
;
442 uint64_t kstackb
, kstackt
, *plbts
;
446 cdata
->plrecords
[cplrecord
].pltype
= enable
;
447 cdata
->plrecords
[cplrecord
].plevel
= cpli
;
449 plbts
= &cdata
->plrecords
[cplrecord
].plbt
[0];
453 if (cplrecord
>= MAX_PREEMPTION_RECORDS
) {
457 cdata
->cpu_plri
= cplrecord
;
458 /* Obtain the 'current' program counter, initial backtrace
459 * element. This will also indicate if we were unable to
460 * trace further up the stack for some reason
462 __asm__
volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
468 thread_t cplthread
= cdata
->cpu_active_thread
;
471 __asm__
__volatile__ ("movq %%rsp, %0": "=r" (csp
):);
472 /* Determine which stack we're on to populate stack bounds.
473 * We don't need to trace across stack boundaries for this
476 kstackb
= cdata
->cpu_active_stack
;
477 kstackt
= kstackb
+ KERNEL_STACK_SIZE
;
478 if (csp
< kstackb
|| csp
> kstackt
) {
479 kstackt
= cdata
->cpu_kernel_stack
;
480 kstackb
= kstackb
- KERNEL_STACK_SIZE
;
481 if (csp
< kstackb
|| csp
> kstackt
) {
482 kstackt
= cdata
->cpu_int_stack_top
;
483 kstackb
= kstackt
- INTSTACK_SIZE
;
484 if (csp
< kstackb
|| csp
> kstackt
) {
485 kstackt
= (uintptr_t)low_eintstack
;
486 kstackb
= (uintptr_t)low_eintstack
- INTSTACK_SIZE
;
487 if (csp
< kstackb
|| csp
> kstackt
) {
488 kstackb
= (uintptr_t) mp_slave_stack
;
489 kstackt
= (uintptr_t) mp_slave_stack
+ PAGE_SIZE
;
496 pltrace_bt(&plbts
[1], MAXPLFRAMES
- 1, kstackb
, kstackt
);
501 extern int plctrace_enabled
;
502 #endif /* DEVELOPMENT || DEBUG */
504 static inline void pltrace(boolean_t plenable
) {
505 #if DEVELOPMENT || DEBUG
506 if (__improbable(plctrace_enabled
!= 0)) {
507 pltrace_internal(plenable
);
515 disable_preemption_internal(void) {
516 assert(get_preemption_level() >= 0);
518 #if defined(__clang__)
519 cpu_data_t GS_RELATIVE
*cpu_data
= (cpu_data_t GS_RELATIVE
*)0UL;
520 cpu_data
->cpu_preemption_level
++;
522 __asm__
volatile ("incl %%gs:%P0"
524 : "i" (offsetof(cpu_data_t
, cpu_preemption_level
)));
530 enable_preemption_internal(void) {
531 assert(get_preemption_level() > 0);
533 #if defined(__clang__)
534 cpu_data_t GS_RELATIVE
*cpu_data
= (cpu_data_t GS_RELATIVE
*)0UL;
535 if (0 == --cpu_data
->cpu_preemption_level
)
536 kernel_preempt_check();
538 __asm__
volatile ("decl %%gs:%P0 \n\t"
540 "call _kernel_preempt_check \n\t"
543 : "i" (offsetof(cpu_data_t
, cpu_preemption_level
))
544 : "eax", "ecx", "edx", "cc", "memory");
549 enable_preemption_no_check(void)
551 assert(get_preemption_level() > 0);
554 #if defined(__clang__)
555 cpu_data_t GS_RELATIVE
*cpu_data
= (cpu_data_t GS_RELATIVE
*)0UL;
556 cpu_data
->cpu_preemption_level
--;
558 __asm__
volatile ("decl %%gs:%P0"
560 : "i" (offsetof(cpu_data_t
, cpu_preemption_level
))
566 _enable_preemption_no_check(void) {
567 enable_preemption_no_check();
571 mp_disable_preemption(void)
573 disable_preemption_internal();
577 _mp_disable_preemption(void)
579 disable_preemption_internal();
583 mp_enable_preemption(void)
585 enable_preemption_internal();
589 _mp_enable_preemption(void) {
590 enable_preemption_internal();
594 mp_enable_preemption_no_check(void) {
595 enable_preemption_no_check();
599 _mp_enable_preemption_no_check(void) {
600 enable_preemption_no_check();
603 #ifdef XNU_KERNEL_PRIVATE
604 #define disable_preemption() disable_preemption_internal()
605 #define enable_preemption() enable_preemption_internal()
606 #define MACHINE_PREEMPTION_MACROS (1)
609 static inline cpu_data_t
*
611 return cpu_data_ptr
[cpu
];
615 cpu_is_running(int cpu
) {
616 return ((cpu_datap(cpu
) != NULL
) && (cpu_datap(cpu
)->cpu_running
));
619 #ifdef MACH_KERNEL_PRIVATE
620 static inline cpu_data_t
*
621 cpu_shadowp(int cpu
) {
622 return cpu_data_ptr
[cpu
]->cd_shadow
;
626 extern cpu_data_t
*cpu_data_alloc(boolean_t is_boot_cpu
);
627 extern void cpu_data_realloc(void);
629 #endif /* I386_CPU_DATA */