]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/cpu_data.h
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / i386 / cpu_data.h
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 *
31 */
32
33 #ifndef I386_CPU_DATA
34 #define I386_CPU_DATA
35
36 #include <mach_assert.h>
37
38 #include <kern/assert.h>
39 #include <kern/kern_types.h>
40 #include <kern/queue.h>
41 #include <kern/processor.h>
42 #include <kern/pms.h>
43 #include <pexpert/pexpert.h>
44 #include <mach/i386/thread_status.h>
45 #include <mach/i386/vm_param.h>
46 #include <i386/locks.h>
47 #include <i386/rtclock_protos.h>
48 #include <i386/pmCPU.h>
49 #include <i386/cpu_topology.h>
50
51 #if CONFIG_VMX
52 #include <i386/vmx/vmx_cpu.h>
53 #endif
54
55 #include <machine/pal_routines.h>
56
57 /*
58 * Data structures referenced (anonymously) from per-cpu data:
59 */
60 struct cpu_cons_buffer;
61 struct cpu_desc_table;
62 struct mca_state;
63 struct prngContext;
64
65 /*
66 * Data structures embedded in per-cpu data:
67 */
68 typedef struct rtclock_timer {
69 mpqueue_head_t queue;
70 uint64_t deadline;
71 uint64_t when_set;
72 boolean_t has_expired;
73 } rtclock_timer_t;
74
75
76 typedef struct {
77 struct x86_64_tss *cdi_ktss;
78 struct __attribute__((packed)) {
79 uint16_t size;
80 void *ptr;
81 } cdi_gdt, cdi_idt;
82 struct fake_descriptor *cdi_ldt;
83 vm_offset_t cdi_sstk;
84 } cpu_desc_index_t;
85
86 typedef enum {
87 TASK_MAP_32BIT, /* 32-bit user, compatibility mode */
88 TASK_MAP_64BIT, /* 64-bit user thread, shared space */
89 } task_map_t;
90
91
92 /*
93 * This structure is used on entry into the (uber-)kernel on syscall from
94 * a 64-bit user. It contains the address of the machine state save area
95 * for the current thread and a temporary place to save the user's rsp
96 * before loading this address into rsp.
97 */
98 typedef struct {
99 addr64_t cu_isf; /* thread->pcb->iss.isf */
100 uint64_t cu_tmp; /* temporary scratch */
101 addr64_t cu_user_gs_base;
102 } cpu_uber_t;
103
104 typedef uint16_t pcid_t;
105 typedef uint8_t pcid_ref_t;
106
107 #define CPU_RTIME_BINS (12)
108 #define CPU_ITIME_BINS (CPU_RTIME_BINS)
109
110 #define MAXPLFRAMES (32)
111 typedef struct {
112 boolean_t pltype;
113 int plevel;
114 uint64_t plbt[MAXPLFRAMES];
115 } plrecord_t;
116
117 /*
118 * Per-cpu data.
119 *
120 * Each processor has a per-cpu data area which is dereferenced through the
121 * current_cpu_datap() macro. For speed, the %gs segment is based here, and
122 * using this, inlines provides single-instruction access to frequently used
123 * members - such as get_cpu_number()/cpu_number(), and get_active_thread()/
124 * current_thread().
125 *
126 * Cpu data owned by another processor can be accessed using the
127 * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu
128 * pointers.
129 */
130 typedef struct cpu_data
131 {
132 struct pal_cpu_data cpu_pal_data; /* PAL-specific data */
133 #define cpu_pd cpu_pal_data /* convenience alias */
134 struct cpu_data *cpu_this; /* pointer to myself */
135 thread_t cpu_active_thread;
136 thread_t cpu_nthread;
137 volatile int cpu_preemption_level;
138 int cpu_number; /* Logical CPU */
139 void *cpu_int_state; /* interrupt state */
140 vm_offset_t cpu_active_stack; /* kernel stack base */
141 vm_offset_t cpu_kernel_stack; /* kernel stack top */
142 vm_offset_t cpu_int_stack_top;
143 int cpu_interrupt_level;
144 int cpu_phys_number; /* Physical CPU */
145 cpu_id_t cpu_id; /* Platform Expert */
146 volatile int cpu_signals; /* IPI events */
147 volatile int cpu_prior_signals; /* Last set of events,
148 * debugging
149 */
150 ast_t cpu_pending_ast;
151 volatile int cpu_running;
152 boolean_t cpu_fixed_pmcs_enabled;
153 rtclock_timer_t rtclock_timer;
154 volatile addr64_t cpu_active_cr3 __attribute((aligned(64)));
155 union {
156 volatile uint32_t cpu_tlb_invalid;
157 struct {
158 volatile uint16_t cpu_tlb_invalid_local;
159 volatile uint16_t cpu_tlb_invalid_global;
160 };
161 };
162 volatile task_map_t cpu_task_map;
163 volatile addr64_t cpu_task_cr3;
164 addr64_t cpu_kernel_cr3;
165 boolean_t cpu_pagezero_mapped;
166 cpu_uber_t cpu_uber;
167 void *cpu_chud;
168 void *cpu_console_buf;
169 struct x86_lcpu lcpu;
170 struct processor *cpu_processor;
171 #if NCOPY_WINDOWS > 0
172 struct cpu_pmap *cpu_pmap;
173 #endif
174 struct cpu_desc_table *cpu_desc_tablep;
175 struct fake_descriptor *cpu_ldtp;
176 cpu_desc_index_t cpu_desc_index;
177 int cpu_ldt;
178 #if NCOPY_WINDOWS > 0
179 vm_offset_t cpu_copywindow_base;
180 uint64_t *cpu_copywindow_pdp;
181
182 vm_offset_t cpu_physwindow_base;
183 uint64_t *cpu_physwindow_ptep;
184 #endif
185
186 #define HWINTCNT_SIZE 256
187 uint32_t cpu_hwIntCnt[HWINTCNT_SIZE]; /* Interrupt counts */
188 uint64_t cpu_hwIntpexits[HWINTCNT_SIZE];
189 uint64_t cpu_hwIntcexits[HWINTCNT_SIZE];
190 uint64_t cpu_dr7; /* debug control register */
191 uint64_t cpu_int_event_time; /* intr entry/exit time */
192 pal_rtc_nanotime_t *cpu_nanotime; /* Nanotime info */
193 #if KPC
194 /* double-buffered performance counter data */
195 uint64_t *cpu_kpc_buf[2];
196 /* PMC shadow and reload value buffers */
197 uint64_t *cpu_kpc_shadow;
198 uint64_t *cpu_kpc_reload;
199 #endif
200 uint32_t cpu_pmap_pcid_enabled;
201 pcid_t cpu_active_pcid;
202 pcid_t cpu_last_pcid;
203 pcid_t cpu_kernel_pcid;
204 volatile pcid_ref_t *cpu_pmap_pcid_coherentp;
205 volatile pcid_ref_t *cpu_pmap_pcid_coherentp_kernel;
206 #define PMAP_PCID_MAX_PCID (0x1000)
207 pcid_t cpu_pcid_free_hint;
208 pcid_ref_t cpu_pcid_refcounts[PMAP_PCID_MAX_PCID];
209 pmap_t cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID];
210 #ifdef PCID_STATS
211 uint64_t cpu_pmap_pcid_flushes;
212 uint64_t cpu_pmap_pcid_preserves;
213 #endif
214 uint64_t cpu_aperf;
215 uint64_t cpu_mperf;
216 uint64_t cpu_c3res;
217 uint64_t cpu_c6res;
218 uint64_t cpu_c7res;
219 uint64_t cpu_itime_total;
220 uint64_t cpu_rtime_total;
221 uint64_t cpu_ixtime;
222 uint64_t cpu_idle_exits;
223 uint64_t cpu_rtimes[CPU_RTIME_BINS];
224 uint64_t cpu_itimes[CPU_ITIME_BINS];
225 uint64_t cpu_cur_insns;
226 uint64_t cpu_cur_ucc;
227 uint64_t cpu_cur_urc;
228 uint64_t cpu_gpmcs[4];
229 uint64_t cpu_max_observed_int_latency;
230 int cpu_max_observed_int_latency_vector;
231 volatile boolean_t cpu_NMI_acknowledged;
232 uint64_t debugger_entry_time;
233 uint64_t debugger_ipi_time;
234 /* A separate nested interrupt stack flag, to account
235 * for non-nested interrupts arriving while on the interrupt stack
236 * Currently only occurs when AICPM enables interrupts on the
237 * interrupt stack during processor offlining.
238 */
239 uint32_t cpu_nested_istack;
240 uint32_t cpu_nested_istack_events;
241 x86_saved_state64_t *cpu_fatal_trap_state;
242 x86_saved_state64_t *cpu_post_fatal_trap_state;
243 #if CONFIG_VMX
244 vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */
245 #endif
246 #if CONFIG_MCA
247 struct mca_state *cpu_mca_state; /* State at MC fault */
248 #endif
249 struct prngContext *cpu_prng; /* PRNG's context */
250 int cpu_type;
251 int cpu_subtype;
252 int cpu_threadtype;
253 boolean_t cpu_iflag;
254 boolean_t cpu_boot_complete;
255 int cpu_hibernate;
256 #define MAX_PREEMPTION_RECORDS (128)
257 #if DEVELOPMENT || DEBUG
258 int cpu_plri;
259 plrecord_t plrecords[MAX_PREEMPTION_RECORDS];
260 #endif
261 } cpu_data_t;
262
263 extern cpu_data_t *cpu_data_ptr[];
264
265 /* Macro to generate inline bodies to retrieve per-cpu data fields. */
266 #if defined(__clang__)
267 #define GS_RELATIVE volatile __attribute__((address_space(256)))
268 #ifndef offsetof
269 #define offsetof(TYPE,MEMBER) __builtin_offsetof(TYPE,MEMBER)
270 #endif
271
272 #define CPU_DATA_GET(member,type) \
273 cpu_data_t GS_RELATIVE *cpu_data = \
274 (cpu_data_t GS_RELATIVE *)0UL; \
275 type ret; \
276 ret = cpu_data->member; \
277 return ret;
278
279 #define CPU_DATA_GET_INDEX(member,index,type) \
280 cpu_data_t GS_RELATIVE *cpu_data = \
281 (cpu_data_t GS_RELATIVE *)0UL; \
282 type ret; \
283 ret = cpu_data->member[index]; \
284 return ret;
285
286 #define CPU_DATA_SET(member,value) \
287 cpu_data_t GS_RELATIVE *cpu_data = \
288 (cpu_data_t GS_RELATIVE *)0UL; \
289 cpu_data->member = value;
290
291 #define CPU_DATA_XCHG(member,value,type) \
292 cpu_data_t GS_RELATIVE *cpu_data = \
293 (cpu_data_t GS_RELATIVE *)0UL; \
294 type ret; \
295 ret = cpu_data->member; \
296 cpu_data->member = value; \
297 return ret;
298
299 #else /* !defined(__clang__) */
300
301 #ifndef offsetof
302 #define offsetof(TYPE,MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
303 #endif /* offsetof */
304 #define CPU_DATA_GET(member,type) \
305 type ret; \
306 __asm__ volatile ("mov %%gs:%P1,%0" \
307 : "=r" (ret) \
308 : "i" (offsetof(cpu_data_t,member))); \
309 return ret;
310
311 #define CPU_DATA_GET_INDEX(member,index,type) \
312 type ret; \
313 __asm__ volatile ("mov %%gs:(%1),%0" \
314 : "=r" (ret) \
315 : "r" (offsetof(cpu_data_t,member[index]))); \
316 return ret;
317
318 #define CPU_DATA_SET(member,value) \
319 __asm__ volatile ("mov %0,%%gs:%P1" \
320 : \
321 : "r" (value), "i" (offsetof(cpu_data_t,member)));
322
323 #define CPU_DATA_XCHG(member,value,type) \
324 type ret; \
325 __asm__ volatile ("xchg %0,%%gs:%P1" \
326 : "=r" (ret) \
327 : "i" (offsetof(cpu_data_t,member)), "0" (value)); \
328 return ret;
329
330 #endif /* !defined(__clang__) */
331
332 /*
333 * Everyone within the osfmk part of the kernel can use the fast
334 * inline versions of these routines. Everyone outside, must call
335 * the real thing,
336 */
337 static inline thread_t
338 get_active_thread(void)
339 {
340 CPU_DATA_GET(cpu_active_thread,thread_t)
341 }
342 #define current_thread_fast() get_active_thread()
343 #define current_thread() current_thread_fast()
344
345 #define cpu_mode_is64bit() TRUE
346
347 static inline int
348 get_preemption_level(void)
349 {
350 CPU_DATA_GET(cpu_preemption_level,int)
351 }
352 static inline int
353 get_interrupt_level(void)
354 {
355 CPU_DATA_GET(cpu_interrupt_level,int)
356 }
357 static inline int
358 get_cpu_number(void)
359 {
360 CPU_DATA_GET(cpu_number,int)
361 }
362 static inline int
363 get_cpu_phys_number(void)
364 {
365 CPU_DATA_GET(cpu_phys_number,int)
366 }
367
368 static inline cpu_data_t *
369 current_cpu_datap(void) {
370 CPU_DATA_GET(cpu_this, cpu_data_t *);
371 }
372
373 /*
374 * Facility to diagnose preemption-level imbalances, which are otherwise
375 * challenging to debug. On each operation that enables or disables preemption,
376 * we record a backtrace into a per-CPU ring buffer, along with the current
377 * preemption level and operation type. Thus, if an imbalance is observed,
378 * one can examine these per-CPU records to determine which codepath failed
379 * to re-enable preemption, enabled premption without a corresponding
380 * disablement etc. The backtracer determines which stack is currently active,
381 * and uses that to perform bounds checks on unterminated stacks.
382 * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15)
383 * The bounds check currently doesn't account for non-default thread stack sizes.
384 */
385 #if DEVELOPMENT || DEBUG
386 static inline void pltrace_bt(uint64_t *rets, int maxframes, uint64_t stacklo, uint64_t stackhi) {
387 uint64_t *cfp = (uint64_t *) __builtin_frame_address(0);
388 int plbtf;
389
390 assert(stacklo !=0 && stackhi !=0);
391
392 for (plbtf = 0; plbtf < maxframes; plbtf++) {
393 if (((uint64_t)cfp == 0) || (((uint64_t)cfp < stacklo) || ((uint64_t)cfp > stackhi))) {
394 rets[plbtf] = 0;
395 continue;
396 }
397 rets[plbtf] = *(cfp + 1);
398 cfp = (uint64_t *) (*cfp);
399 }
400 }
401
402
403 extern uint32_t low_intstack[]; /* bottom */
404 extern uint32_t low_eintstack[]; /* top */
405 extern char mp_slave_stack[PAGE_SIZE];
406
407 static inline void pltrace_internal(boolean_t enable) {
408 cpu_data_t *cdata = current_cpu_datap();
409 int cpli = cdata->cpu_preemption_level;
410 int cplrecord = cdata->cpu_plri;
411 uint64_t kstackb, kstackt, *plbts;
412
413 assert(cpli >= 0);
414
415 cdata->plrecords[cplrecord].pltype = enable;
416 cdata->plrecords[cplrecord].plevel = cpli;
417
418 plbts = &cdata->plrecords[cplrecord].plbt[0];
419
420 cplrecord++;
421
422 if (cplrecord >= MAX_PREEMPTION_RECORDS) {
423 cplrecord = 0;
424 }
425
426 cdata->cpu_plri = cplrecord;
427 /* Obtain the 'current' program counter, initial backtrace
428 * element. This will also indicate if we were unable to
429 * trace further up the stack for some reason
430 */
431 __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
432 : "=m" (plbts[0])
433 :
434 : "rax");
435
436
437 thread_t cplthread = cdata->cpu_active_thread;
438 if (cplthread) {
439 uintptr_t csp;
440 __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
441 /* Determine which stack we're on to populate stack bounds.
442 * We don't need to trace across stack boundaries for this
443 * routine.
444 */
445 kstackb = cdata->cpu_active_stack;
446 kstackt = kstackb + KERNEL_STACK_SIZE;
447 if (csp < kstackb || csp > kstackt) {
448 kstackt = cdata->cpu_kernel_stack;
449 kstackb = kstackb - KERNEL_STACK_SIZE;
450 if (csp < kstackb || csp > kstackt) {
451 kstackt = cdata->cpu_int_stack_top;
452 kstackb = kstackt - INTSTACK_SIZE;
453 if (csp < kstackb || csp > kstackt) {
454 kstackt = (uintptr_t)low_eintstack;
455 kstackb = (uintptr_t)low_eintstack - INTSTACK_SIZE;
456 if (csp < kstackb || csp > kstackt) {
457 kstackb = (uintptr_t) mp_slave_stack;
458 kstackt = (uintptr_t) mp_slave_stack + PAGE_SIZE;
459 }
460 }
461 }
462 }
463
464 if (kstackb) {
465 pltrace_bt(&plbts[1], MAXPLFRAMES - 1, kstackb, kstackt);
466 }
467 }
468 }
469
470 extern int plctrace_enabled;
471 #endif /* DEVELOPMENT || DEBUG */
472
473 static inline void pltrace(boolean_t plenable) {
474 #if DEVELOPMENT || DEBUG
475 if (__improbable(plctrace_enabled != 0)) {
476 pltrace_internal(plenable);
477 }
478 #else
479 (void)plenable;
480 #endif
481 }
482
483 static inline void
484 disable_preemption_internal(void) {
485 assert(get_preemption_level() >= 0);
486
487 #if defined(__clang__)
488 cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
489 cpu_data->cpu_preemption_level++;
490 #else
491 __asm__ volatile ("incl %%gs:%P0"
492 :
493 : "i" (offsetof(cpu_data_t, cpu_preemption_level)));
494 #endif
495 pltrace(FALSE);
496 }
497
498 static inline void
499 enable_preemption_internal(void) {
500 assert(get_preemption_level() > 0);
501 pltrace(TRUE);
502 #if defined(__clang__)
503 cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
504 if (0 == --cpu_data->cpu_preemption_level)
505 kernel_preempt_check();
506 #else
507 __asm__ volatile ("decl %%gs:%P0 \n\t"
508 "jne 1f \n\t"
509 "call _kernel_preempt_check \n\t"
510 "1:"
511 : /* no outputs */
512 : "i" (offsetof(cpu_data_t, cpu_preemption_level))
513 : "eax", "ecx", "edx", "cc", "memory");
514 #endif
515 }
516
517 static inline void
518 enable_preemption_no_check(void)
519 {
520 assert(get_preemption_level() > 0);
521
522 pltrace(TRUE);
523 #if defined(__clang__)
524 cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL;
525 cpu_data->cpu_preemption_level--;
526 #else
527 __asm__ volatile ("decl %%gs:%P0"
528 : /* no outputs */
529 : "i" (offsetof(cpu_data_t, cpu_preemption_level))
530 : "cc", "memory");
531 #endif
532 }
533
534 static inline void
535 _enable_preemption_no_check(void) {
536 enable_preemption_no_check();
537 }
538
539 static inline void
540 mp_disable_preemption(void)
541 {
542 disable_preemption_internal();
543 }
544
545 static inline void
546 _mp_disable_preemption(void)
547 {
548 disable_preemption_internal();
549 }
550
551 static inline void
552 mp_enable_preemption(void)
553 {
554 enable_preemption_internal();
555 }
556
557 static inline void
558 _mp_enable_preemption(void) {
559 enable_preemption_internal();
560 }
561
562 static inline void
563 mp_enable_preemption_no_check(void) {
564 enable_preemption_no_check();
565 }
566
567 static inline void
568 _mp_enable_preemption_no_check(void) {
569 enable_preemption_no_check();
570 }
571
572 #ifdef XNU_KERNEL_PRIVATE
573 #define disable_preemption() disable_preemption_internal()
574 #define enable_preemption() enable_preemption_internal()
575 #define MACHINE_PREEMPTION_MACROS (1)
576 #endif
577
578
579 static inline cpu_data_t *
580 cpu_datap(int cpu) {
581 return cpu_data_ptr[cpu];
582 }
583
584 extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu);
585 extern void cpu_data_realloc(void);
586
587 #endif /* I386_CPU_DATA */