]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
0a7de745 | 2 | * Copyright (c) 2000-2019 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0a7de745 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
0a7de745 | 30 | * |
1c79356b A |
31 | */ |
32 | ||
0a7de745 | 33 | #ifndef I386_CPU_DATA |
1c79356b A |
34 | #define I386_CPU_DATA |
35 | ||
1c79356b | 36 | #include <mach_assert.h> |
cb323159 | 37 | #include <machine/atomic.h> |
1c79356b | 38 | |
1c79356b A |
39 | #include <kern/assert.h> |
40 | #include <kern/kern_types.h> | |
0a7de745 | 41 | #include <kern/mpqueue.h> |
b0d623f7 | 42 | #include <kern/queue.h> |
91447636 | 43 | #include <kern/processor.h> |
0c530ab8 | 44 | #include <kern/pms.h> |
55e303ae | 45 | #include <pexpert/pexpert.h> |
0c530ab8 | 46 | #include <mach/i386/thread_status.h> |
b0d623f7 | 47 | #include <mach/i386/vm_param.h> |
fe8ab488 | 48 | #include <i386/locks.h> |
6d2010ae | 49 | #include <i386/rtclock_protos.h> |
0c530ab8 | 50 | #include <i386/pmCPU.h> |
2d21ac55 | 51 | #include <i386/cpu_topology.h> |
5ba3f43e | 52 | #include <i386/seg.h> |
0a7de745 | 53 | #include <i386/mp.h> |
2d21ac55 | 54 | |
b0d623f7 | 55 | #if CONFIG_VMX |
2d21ac55 | 56 | #include <i386/vmx/vmx_cpu.h> |
b0d623f7 | 57 | #endif |
91447636 | 58 | |
5ba3f43e A |
59 | #if MONOTONIC |
60 | #include <machine/monotonic.h> | |
61 | #endif /* MONOTONIC */ | |
62 | ||
6d2010ae A |
63 | #include <machine/pal_routines.h> |
64 | ||
91447636 A |
65 | /* |
66 | * Data structures referenced (anonymously) from per-cpu data: | |
67 | */ | |
91447636 | 68 | struct cpu_cons_buffer; |
0c530ab8 | 69 | struct cpu_desc_table; |
2d21ac55 | 70 | struct mca_state; |
fe8ab488 | 71 | struct prngContext; |
91447636 | 72 | |
91447636 A |
73 | /* |
74 | * Data structures embedded in per-cpu data: | |
75 | */ | |
76 | typedef struct rtclock_timer { | |
0a7de745 A |
77 | mpqueue_head_t queue; |
78 | uint64_t deadline; | |
79 | uint64_t when_set; | |
80 | boolean_t has_expired; | |
91447636 A |
81 | } rtclock_timer_t; |
82 | ||
91447636 | 83 | typedef struct { |
5c9f4661 | 84 | /* The 'u' suffixed fields store the double-mapped descriptor addresses */ |
0a7de745 A |
85 | struct x86_64_tss *cdi_ktssu; |
86 | struct x86_64_tss *cdi_ktssb; | |
87 | x86_64_desc_register_t cdi_gdtu; | |
88 | x86_64_desc_register_t cdi_gdtb; | |
89 | x86_64_desc_register_t cdi_idtu; | |
90 | x86_64_desc_register_t cdi_idtb; | |
91 | struct real_descriptor *cdi_ldtu; | |
92 | struct real_descriptor *cdi_ldtb; | |
93 | vm_offset_t cdi_sstku; | |
94 | vm_offset_t cdi_sstkb; | |
b0d623f7 A |
95 | } cpu_desc_index_t; |
96 | ||
97 | typedef enum { | |
0a7de745 A |
98 | TASK_MAP_32BIT, /* 32-bit user, compatibility mode */ |
99 | TASK_MAP_64BIT, /* 64-bit user thread, shared space */ | |
b0d623f7 A |
100 | } task_map_t; |
101 | ||
b0d623f7 | 102 | |
0c530ab8 A |
103 | /* |
104 | * This structure is used on entry into the (uber-)kernel on syscall from | |
105 | * a 64-bit user. It contains the address of the machine state save area | |
106 | * for the current thread and a temporary place to save the user's rsp | |
107 | * before loading this address into rsp. | |
108 | */ | |
109 | typedef struct { | |
0a7de745 A |
110 | addr64_t cu_isf; /* thread->pcb->iss.isf */ |
111 | uint64_t cu_tmp; /* temporary scratch */ | |
112 | addr64_t cu_user_gs_base; | |
0c530ab8 | 113 | } cpu_uber_t; |
91447636 | 114 | |
0a7de745 A |
115 | typedef uint16_t pcid_t; |
116 | typedef uint8_t pcid_ref_t; | |
bd504ef0 A |
117 | |
118 | #define CPU_RTIME_BINS (12) | |
119 | #define CPU_ITIME_BINS (CPU_RTIME_BINS) | |
120 | ||
0a7de745 | 121 | #define MAX_TRACE_BTFRAMES (16) |
39037602 A |
122 | typedef struct { |
123 | boolean_t pltype; | |
124 | int plevel; | |
0a7de745 | 125 | uint64_t plbt[MAX_TRACE_BTFRAMES]; |
39037602 A |
126 | } plrecord_t; |
127 | ||
94ff46dc | 128 | #if DEVELOPMENT || DEBUG |
0a7de745 A |
129 | typedef enum { |
130 | IOTRACE_PHYS_READ = 1, | |
131 | IOTRACE_PHYS_WRITE, | |
132 | IOTRACE_IO_READ, | |
133 | IOTRACE_IO_WRITE, | |
134 | IOTRACE_PORTIO_READ, | |
135 | IOTRACE_PORTIO_WRITE | |
136 | } iotrace_type_e; | |
137 | ||
138 | typedef struct { | |
139 | iotrace_type_e iotype; | |
140 | int size; | |
141 | uint64_t vaddr; | |
142 | uint64_t paddr; | |
143 | uint64_t val; | |
144 | uint64_t start_time_abs; | |
145 | uint64_t duration; | |
146 | uint64_t backtrace[MAX_TRACE_BTFRAMES]; | |
147 | } iotrace_entry_t; | |
148 | ||
94ff46dc A |
149 | typedef struct { |
150 | int vector; /* Vector number of interrupt */ | |
151 | thread_t curthread; /* Current thread at the time of the interrupt */ | |
152 | uint64_t interrupted_pc; | |
153 | int curpl; /* Current preemption level */ | |
154 | int curil; /* Current interrupt level */ | |
155 | uint64_t start_time_abs; | |
156 | uint64_t duration; | |
157 | uint64_t backtrace[MAX_TRACE_BTFRAMES]; | |
158 | } traptrace_entry_t; | |
159 | ||
0a7de745 A |
160 | #define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64) |
161 | #define IOTRACE_MAX_ENTRIES_PER_CPU (256) | |
162 | extern volatile int mmiotrace_enabled; | |
163 | extern int iotrace_generators; | |
164 | extern int iotrace_entries_per_cpu; | |
165 | extern int *iotrace_next; | |
166 | extern iotrace_entry_t **iotrace_ring; | |
167 | ||
94ff46dc A |
168 | #define TRAPTRACE_INVALID_INDEX (~0U) |
169 | #define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16) | |
170 | #define TRAPTRACE_MAX_ENTRIES_PER_CPU (256) | |
171 | extern volatile int traptrace_enabled; | |
172 | extern int traptrace_generators; | |
173 | extern int traptrace_entries_per_cpu; | |
174 | extern int *traptrace_next; | |
175 | extern traptrace_entry_t **traptrace_ring; | |
0a7de745 A |
176 | #endif /* DEVELOPMENT || DEBUG */ |
177 | ||
91447636 A |
178 | /* |
179 | * Per-cpu data. | |
180 | * | |
181 | * Each processor has a per-cpu data area which is dereferenced through the | |
182 | * current_cpu_datap() macro. For speed, the %gs segment is based here, and | |
183 | * using this, inlines provides single-instruction access to frequently used | |
184 | * members - such as get_cpu_number()/cpu_number(), and get_active_thread()/ | |
0a7de745 A |
185 | * current_thread(). |
186 | * | |
91447636 A |
187 | * Cpu data owned by another processor can be accessed using the |
188 | * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu | |
189 | * pointers. | |
190 | */ | |
5c9f4661 | 191 | typedef struct { |
0a7de745 A |
192 | pcid_t cpu_pcid_free_hint; |
193 | #define PMAP_PCID_MAX_PCID (0x800) | |
194 | pcid_ref_t cpu_pcid_refcounts[PMAP_PCID_MAX_PCID]; | |
195 | pmap_t cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID]; | |
5c9f4661 A |
196 | } pcid_cdata_t; |
197 | ||
0a7de745 A |
198 | typedef struct cpu_data { |
199 | struct pal_cpu_data cpu_pal_data; /* PAL-specific data */ | |
200 | #define cpu_pd cpu_pal_data /* convenience alias */ | |
201 | struct cpu_data *cpu_this; /* pointer to myself */ | |
202 | thread_t cpu_active_thread; | |
203 | thread_t cpu_nthread; | |
0a7de745 A |
204 | int cpu_number; /* Logical CPU */ |
205 | void *cpu_int_state; /* interrupt state */ | |
206 | vm_offset_t cpu_active_stack; /* kernel stack base */ | |
207 | vm_offset_t cpu_kernel_stack; /* kernel stack top */ | |
208 | vm_offset_t cpu_int_stack_top; | |
0a7de745 A |
209 | volatile int cpu_signals; /* IPI events */ |
210 | volatile int cpu_prior_signals; /* Last set of events, | |
211 | * debugging | |
212 | */ | |
213 | ast_t cpu_pending_ast; | |
ea3f0419 A |
214 | /* |
215 | * Note if rearranging fields: | |
216 | * We want cpu_preemption_level on a different | |
217 | * cache line than cpu_active_thread | |
218 | * for optimizing mtx_spin phase. | |
219 | */ | |
220 | int cpu_interrupt_level; | |
221 | volatile int cpu_preemption_level; | |
0a7de745 | 222 | volatile int cpu_running; |
5ba3f43e | 223 | #if !MONOTONIC |
0a7de745 | 224 | boolean_t cpu_fixed_pmcs_enabled; |
5ba3f43e | 225 | #endif /* !MONOTONIC */ |
0a7de745 A |
226 | rtclock_timer_t rtclock_timer; |
227 | uint64_t quantum_timer_deadline; | |
228 | volatile addr64_t cpu_active_cr3 __attribute((aligned(64))); | |
6d2010ae A |
229 | union { |
230 | volatile uint32_t cpu_tlb_invalid; | |
231 | struct { | |
232 | volatile uint16_t cpu_tlb_invalid_local; | |
233 | volatile uint16_t cpu_tlb_invalid_global; | |
234 | }; | |
235 | }; | |
0a7de745 A |
236 | uint64_t cpu_ip_desc[2]; |
237 | volatile task_map_t cpu_task_map; | |
238 | volatile addr64_t cpu_task_cr3; | |
239 | addr64_t cpu_kernel_cr3; | |
a39ff7e2 | 240 | volatile addr64_t cpu_ucr3; |
0a7de745 A |
241 | volatile addr64_t cpu_shadowtask_cr3; |
242 | boolean_t cpu_pagezero_mapped; | |
243 | cpu_uber_t cpu_uber; | |
a39ff7e2 | 244 | /* Double-mapped per-CPU exception stack address */ |
0a7de745 A |
245 | uintptr_t cd_estack; |
246 | int cpu_xstate; | |
247 | int cpu_curtask_has_ldt; | |
248 | int cpu_curthread_do_segchk; | |
a39ff7e2 A |
249 | /* Address of shadowed, partially mirrored CPU data structures located |
250 | * in the double mapped PML4 | |
251 | */ | |
0a7de745 A |
252 | void *cd_shadow; |
253 | union { | |
254 | volatile uint32_t cpu_tlb_invalid_count; | |
255 | struct { | |
256 | volatile uint16_t cpu_tlb_invalid_local_count; | |
257 | volatile uint16_t cpu_tlb_invalid_global_count; | |
258 | }; | |
259 | }; | |
260 | ||
261 | uint16_t cpu_tlb_gen_counts_local[MAX_CPUS]; | |
262 | uint16_t cpu_tlb_gen_counts_global[MAX_CPUS]; | |
263 | ||
264 | struct processor *cpu_processor; | |
b0d623f7 | 265 | #if NCOPY_WINDOWS > 0 |
0a7de745 | 266 | struct cpu_pmap *cpu_pmap; |
b0d623f7 | 267 | #endif |
0a7de745 A |
268 | struct real_descriptor *cpu_ldtp; |
269 | struct cpu_desc_table *cpu_desc_tablep; | |
270 | cpu_desc_index_t cpu_desc_index; | |
271 | int cpu_ldt; | |
b0d623f7 | 272 | #if NCOPY_WINDOWS > 0 |
0a7de745 A |
273 | vm_offset_t cpu_copywindow_base; |
274 | uint64_t *cpu_copywindow_pdp; | |
2d21ac55 | 275 | |
0a7de745 A |
276 | vm_offset_t cpu_physwindow_base; |
277 | uint64_t *cpu_physwindow_ptep; | |
b0d623f7 A |
278 | #endif |
279 | ||
6d2010ae | 280 | #define HWINTCNT_SIZE 256 |
0a7de745 A |
281 | uint32_t cpu_hwIntCnt[HWINTCNT_SIZE]; /* Interrupt counts */ |
282 | uint64_t cpu_hwIntpexits[HWINTCNT_SIZE]; | |
283 | uint64_t cpu_dr7; /* debug control register */ | |
284 | uint64_t cpu_int_event_time; /* intr entry/exit time */ | |
285 | pal_rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ | |
39236c6e A |
286 | #if KPC |
287 | /* double-buffered performance counter data */ | |
288 | uint64_t *cpu_kpc_buf[2]; | |
289 | /* PMC shadow and reload value buffers */ | |
290 | uint64_t *cpu_kpc_shadow; | |
291 | uint64_t *cpu_kpc_reload; | |
292 | #endif | |
5ba3f43e A |
293 | #if MONOTONIC |
294 | struct mt_cpu cpu_monotonic; | |
295 | #endif /* MONOTONIC */ | |
0a7de745 A |
296 | uint32_t cpu_pmap_pcid_enabled; |
297 | pcid_t cpu_active_pcid; | |
298 | pcid_t cpu_last_pcid; | |
299 | pcid_t cpu_kernel_pcid; | |
300 | volatile pcid_ref_t *cpu_pmap_pcid_coherentp; | |
301 | volatile pcid_ref_t *cpu_pmap_pcid_coherentp_kernel; | |
302 | pcid_cdata_t *cpu_pcid_data; | |
303 | #ifdef PCID_STATS | |
304 | uint64_t cpu_pmap_pcid_flushes; | |
305 | uint64_t cpu_pmap_pcid_preserves; | |
6d2010ae | 306 | #endif |
0a7de745 A |
307 | uint64_t cpu_aperf; |
308 | uint64_t cpu_mperf; | |
309 | uint64_t cpu_c3res; | |
310 | uint64_t cpu_c6res; | |
311 | uint64_t cpu_c7res; | |
312 | uint64_t cpu_itime_total; | |
313 | uint64_t cpu_rtime_total; | |
314 | uint64_t cpu_ixtime; | |
bd504ef0 | 315 | uint64_t cpu_idle_exits; |
0a7de745 A |
316 | uint64_t cpu_rtimes[CPU_RTIME_BINS]; |
317 | uint64_t cpu_itimes[CPU_ITIME_BINS]; | |
5ba3f43e | 318 | #if !MONOTONIC |
0a7de745 A |
319 | uint64_t cpu_cur_insns; |
320 | uint64_t cpu_cur_ucc; | |
321 | uint64_t cpu_cur_urc; | |
5ba3f43e | 322 | #endif /* !MONOTONIC */ |
0a7de745 | 323 | uint64_t cpu_gpmcs[4]; |
6d2010ae A |
324 | uint64_t cpu_max_observed_int_latency; |
325 | int cpu_max_observed_int_latency_vector; | |
0a7de745 A |
326 | volatile boolean_t cpu_NMI_acknowledged; |
327 | uint64_t debugger_entry_time; | |
328 | uint64_t debugger_ipi_time; | |
060df5ea A |
329 | /* A separate nested interrupt stack flag, to account |
330 | * for non-nested interrupts arriving while on the interrupt stack | |
331 | * Currently only occurs when AICPM enables interrupts on the | |
332 | * interrupt stack during processor offlining. | |
333 | */ | |
0a7de745 A |
334 | uint32_t cpu_nested_istack; |
335 | uint32_t cpu_nested_istack_events; | |
336 | x86_saved_state64_t *cpu_fatal_trap_state; | |
337 | x86_saved_state64_t *cpu_post_fatal_trap_state; | |
bd504ef0 | 338 | #if CONFIG_VMX |
0a7de745 | 339 | vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */ |
bd504ef0 A |
340 | #endif |
341 | #if CONFIG_MCA | |
0a7de745 | 342 | struct mca_state *cpu_mca_state; /* State at MC fault */ |
bd504ef0 | 343 | #endif |
0a7de745 A |
344 | int cpu_type; |
345 | int cpu_subtype; | |
346 | int cpu_threadtype; | |
347 | boolean_t cpu_iflag; | |
348 | boolean_t cpu_boot_complete; | |
349 | int cpu_hibernate; | |
5c9f4661 | 350 | #define MAX_PREEMPTION_RECORDS (8) |
0a7de745 A |
351 | #if DEVELOPMENT || DEBUG |
352 | int cpu_plri; | |
353 | plrecord_t plrecords[MAX_PREEMPTION_RECORDS]; | |
5c9f4661 | 354 | #endif |
0a7de745 A |
355 | void *cpu_console_buf; |
356 | struct x86_lcpu lcpu; | |
357 | int cpu_phys_number; /* Physical CPU */ | |
358 | cpu_id_t cpu_id; /* Platform Expert */ | |
5c9f4661 | 359 | #if DEBUG |
0a7de745 A |
360 | uint64_t cpu_entry_cr3; |
361 | uint64_t cpu_exit_cr3; | |
362 | uint64_t cpu_pcid_last_cr3; | |
39037602 | 363 | #endif |
0a7de745 | 364 | boolean_t cpu_rendezvous_in_progress; |
55e303ae | 365 | } cpu_data_t; |
1c79356b | 366 | |
0a7de745 | 367 | extern cpu_data_t *cpu_data_ptr[]; |
9bccf70c | 368 | |
55e303ae | 369 | /* Macro to generate inline bodies to retrieve per-cpu data fields. */ |
39236c6e A |
370 | #if defined(__clang__) |
371 | #define GS_RELATIVE volatile __attribute__((address_space(256))) | |
372 | #ifndef offsetof | |
0a7de745 | 373 | #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE,MEMBER) |
39236c6e A |
374 | #endif |
375 | ||
0a7de745 A |
376 | #define CPU_DATA_GET(member, type) \ |
377 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
378 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
379 | type ret; \ | |
380 | ret = cpu_data->member; \ | |
39236c6e A |
381 | return ret; |
382 | ||
0a7de745 A |
383 | #define CPU_DATA_GET_INDEX(member, index, type) \ |
384 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
385 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
386 | type ret; \ | |
387 | ret = cpu_data->member[index]; \ | |
39236c6e A |
388 | return ret; |
389 | ||
0a7de745 A |
390 | #define CPU_DATA_SET(member, value) \ |
391 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
392 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
39236c6e A |
393 | cpu_data->member = value; |
394 | ||
0a7de745 A |
395 | #define CPU_DATA_XCHG(member, value, type) \ |
396 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
397 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
398 | type ret; \ | |
399 | ret = cpu_data->member; \ | |
400 | cpu_data->member = value; \ | |
39236c6e A |
401 | return ret; |
402 | ||
403 | #else /* !defined(__clang__) */ | |
404 | ||
2d21ac55 | 405 | #ifndef offsetof |
0a7de745 | 406 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) |
2d21ac55 | 407 | #endif /* offsetof */ |
0a7de745 A |
408 | #define CPU_DATA_GET(member, type) \ |
409 | type ret; \ | |
410 | __asm__ volatile ("mov %%gs:%P1,%0" \ | |
411 | : "=r" (ret) \ | |
412 | : "i" (offsetof(cpu_data_t,member))); \ | |
55e303ae | 413 | return ret; |
9bccf70c | 414 | |
0a7de745 A |
415 | #define CPU_DATA_GET_INDEX(member, index, type) \ |
416 | type ret; \ | |
417 | __asm__ volatile ("mov %%gs:(%1),%0" \ | |
418 | : "=r" (ret) \ | |
419 | : "r" (offsetof(cpu_data_t,member[index]))); \ | |
6d2010ae A |
420 | return ret; |
421 | ||
0a7de745 A |
422 | #define CPU_DATA_SET(member, value) \ |
423 | __asm__ volatile ("mov %0,%%gs:%P1" \ | |
424 | : \ | |
425 | : "r" (value), "i" (offsetof(cpu_data_t,member))); | |
39236c6e | 426 | |
0a7de745 A |
427 | #define CPU_DATA_XCHG(member, value, type) \ |
428 | type ret; \ | |
429 | __asm__ volatile ("xchg %0,%%gs:%P1" \ | |
430 | : "=r" (ret) \ | |
431 | : "i" (offsetof(cpu_data_t,member)), "0" (value)); \ | |
6d2010ae A |
432 | return ret; |
433 | ||
39236c6e A |
434 | #endif /* !defined(__clang__) */ |
435 | ||
1c79356b A |
436 | /* |
437 | * Everyone within the osfmk part of the kernel can use the fast | |
438 | * inline versions of these routines. Everyone outside, must call | |
439 | * the real thing, | |
440 | */ | |
d9a64523 A |
441 | |
442 | ||
443 | /* | |
444 | * The "volatile" flavor of current_thread() is intended for use by | |
445 | * scheduler code which may need to update the thread pointer in the | |
446 | * course of a context switch. Any call to current_thread() made | |
447 | * prior to the thread pointer update should be safe to optimize away | |
448 | * as it should be consistent with that thread's state to the extent | |
449 | * the compiler can reason about it. Likewise, the context switch | |
450 | * path will eventually result in an arbitrary branch to the new | |
451 | * thread's pc, about which the compiler won't be able to reason. | |
452 | * Thus any compile-time optimization of current_thread() calls made | |
453 | * within the new thread should be safely encapsulated in its | |
454 | * register/stack state. The volatile form therefore exists to cover | |
455 | * the window between the thread pointer update and the branch to | |
456 | * the new pc. | |
457 | */ | |
91447636 | 458 | static inline thread_t |
d9a64523 A |
459 | get_active_thread_volatile(void) |
460 | { | |
0a7de745 | 461 | CPU_DATA_GET(cpu_active_thread, thread_t) |
d9a64523 A |
462 | } |
463 | ||
cb323159 | 464 | static inline __attribute__((const)) thread_t |
91447636 | 465 | get_active_thread(void) |
1c79356b | 466 | { |
0a7de745 | 467 | CPU_DATA_GET(cpu_active_thread, thread_t) |
1c79356b | 468 | } |
d9a64523 | 469 | |
0a7de745 A |
470 | #define current_thread_fast() get_active_thread() |
471 | #define current_thread_volatile() get_active_thread_volatile() | |
472 | #define current_thread() current_thread_fast() | |
1c79356b | 473 | |
0a7de745 | 474 | #define cpu_mode_is64bit() TRUE |
0c530ab8 | 475 | |
91447636 A |
476 | static inline int |
477 | get_preemption_level(void) | |
1c79356b | 478 | { |
0a7de745 | 479 | CPU_DATA_GET(cpu_preemption_level, int) |
55e303ae | 480 | } |
91447636 | 481 | static inline int |
91447636 | 482 | get_interrupt_level(void) |
55e303ae | 483 | { |
0a7de745 | 484 | CPU_DATA_GET(cpu_interrupt_level, int) |
55e303ae | 485 | } |
91447636 A |
486 | static inline int |
487 | get_cpu_number(void) | |
55e303ae | 488 | { |
0a7de745 | 489 | CPU_DATA_GET(cpu_number, int) |
55e303ae | 490 | } |
91447636 A |
491 | static inline int |
492 | get_cpu_phys_number(void) | |
55e303ae | 493 | { |
0a7de745 | 494 | CPU_DATA_GET(cpu_phys_number, int) |
1c79356b | 495 | } |
1c79356b | 496 | |
39037602 | 497 | static inline cpu_data_t * |
0a7de745 A |
498 | current_cpu_datap(void) |
499 | { | |
39037602 A |
500 | CPU_DATA_GET(cpu_this, cpu_data_t *); |
501 | } | |
502 | ||
503 | /* | |
504 | * Facility to diagnose preemption-level imbalances, which are otherwise | |
505 | * challenging to debug. On each operation that enables or disables preemption, | |
506 | * we record a backtrace into a per-CPU ring buffer, along with the current | |
507 | * preemption level and operation type. Thus, if an imbalance is observed, | |
508 | * one can examine these per-CPU records to determine which codepath failed | |
509 | * to re-enable preemption, enabled premption without a corresponding | |
510 | * disablement etc. The backtracer determines which stack is currently active, | |
511 | * and uses that to perform bounds checks on unterminated stacks. | |
512 | * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15) | |
513 | * The bounds check currently doesn't account for non-default thread stack sizes. | |
514 | */ | |
515 | #if DEVELOPMENT || DEBUG | |
0a7de745 | 516 | static inline void |
94ff46dc | 517 | rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata, uint64_t frameptr, bool use_cursp) |
0a7de745 A |
518 | { |
519 | extern uint32_t low_intstack[]; /* bottom */ | |
520 | extern uint32_t low_eintstack[]; /* top */ | |
521 | extern char mp_slave_stack[]; | |
94ff46dc | 522 | int btidx = 0; |
39037602 | 523 | |
0a7de745 | 524 | uint64_t kstackb, kstackt; |
39037602 | 525 | |
39037602 A |
526 | /* Obtain the 'current' program counter, initial backtrace |
527 | * element. This will also indicate if we were unable to | |
528 | * trace further up the stack for some reason | |
529 | */ | |
94ff46dc A |
530 | if (use_cursp) { |
531 | __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" | |
532 | : "=m" (rets[btidx++]) | |
533 | : | |
534 | : "rax"); | |
535 | } | |
39037602 A |
536 | |
537 | thread_t cplthread = cdata->cpu_active_thread; | |
538 | if (cplthread) { | |
539 | uintptr_t csp; | |
94ff46dc A |
540 | if (use_cursp == true) { |
541 | __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):); | |
542 | } else { | |
543 | csp = frameptr; | |
544 | } | |
39037602 A |
545 | /* Determine which stack we're on to populate stack bounds. |
546 | * We don't need to trace across stack boundaries for this | |
547 | * routine. | |
548 | */ | |
549 | kstackb = cdata->cpu_active_stack; | |
550 | kstackt = kstackb + KERNEL_STACK_SIZE; | |
551 | if (csp < kstackb || csp > kstackt) { | |
552 | kstackt = cdata->cpu_kernel_stack; | |
0a7de745 | 553 | kstackb = kstackt - KERNEL_STACK_SIZE; |
39037602 A |
554 | if (csp < kstackb || csp > kstackt) { |
555 | kstackt = cdata->cpu_int_stack_top; | |
556 | kstackb = kstackt - INTSTACK_SIZE; | |
557 | if (csp < kstackb || csp > kstackt) { | |
558 | kstackt = (uintptr_t)low_eintstack; | |
0a7de745 | 559 | kstackb = kstackt - INTSTACK_SIZE; |
39037602 A |
560 | if (csp < kstackb || csp > kstackt) { |
561 | kstackb = (uintptr_t) mp_slave_stack; | |
0a7de745 A |
562 | kstackt = kstackb + PAGE_SIZE; |
563 | } else { | |
564 | kstackb = 0; | |
565 | kstackt = 0; | |
39037602 A |
566 | } |
567 | } | |
568 | } | |
569 | } | |
570 | ||
0a7de745 | 571 | if (__probable(kstackb && kstackt)) { |
94ff46dc | 572 | uint64_t *cfp = (uint64_t *) frameptr; |
0a7de745 A |
573 | int rbbtf; |
574 | ||
94ff46dc | 575 | for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) { |
0a7de745 A |
576 | if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) { |
577 | rets[rbbtf] = 0; | |
578 | continue; | |
579 | } | |
580 | rets[rbbtf] = *(cfp + 1); | |
581 | cfp = (uint64_t *) (*cfp); | |
582 | } | |
39037602 A |
583 | } |
584 | } | |
585 | } | |
586 | ||
0a7de745 A |
587 | static inline void |
588 | pltrace_internal(boolean_t enable) | |
589 | { | |
590 | cpu_data_t *cdata = current_cpu_datap(); | |
591 | int cpli = cdata->cpu_preemption_level; | |
592 | int cplrecord = cdata->cpu_plri; | |
593 | uint64_t *plbts; | |
594 | ||
595 | assert(cpli >= 0); | |
596 | ||
597 | cdata->plrecords[cplrecord].pltype = enable; | |
598 | cdata->plrecords[cplrecord].plevel = cpli; | |
599 | ||
600 | plbts = &cdata->plrecords[cplrecord].plbt[0]; | |
601 | ||
602 | cplrecord++; | |
603 | ||
604 | if (cplrecord >= MAX_PREEMPTION_RECORDS) { | |
605 | cplrecord = 0; | |
606 | } | |
607 | ||
608 | cdata->cpu_plri = cplrecord; | |
609 | ||
94ff46dc | 610 | rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true); |
0a7de745 A |
611 | } |
612 | ||
39037602 | 613 | extern int plctrace_enabled; |
0a7de745 A |
614 | |
615 | static inline void | |
616 | iotrace(iotrace_type_e type, uint64_t vaddr, uint64_t paddr, int size, uint64_t val, | |
617 | uint64_t sabs, uint64_t duration) | |
618 | { | |
619 | cpu_data_t *cdata; | |
620 | int cpu_num, nextidx; | |
621 | iotrace_entry_t *cur_iotrace_ring; | |
622 | ||
623 | if (__improbable(mmiotrace_enabled == 0 || iotrace_generators == 0)) { | |
624 | return; | |
625 | } | |
626 | ||
627 | cdata = current_cpu_datap(); | |
628 | cpu_num = cdata->cpu_number; | |
629 | nextidx = iotrace_next[cpu_num]; | |
630 | cur_iotrace_ring = iotrace_ring[cpu_num]; | |
631 | ||
632 | cur_iotrace_ring[nextidx].iotype = type; | |
633 | cur_iotrace_ring[nextidx].vaddr = vaddr; | |
634 | cur_iotrace_ring[nextidx].paddr = paddr; | |
635 | cur_iotrace_ring[nextidx].size = size; | |
636 | cur_iotrace_ring[nextidx].val = val; | |
637 | cur_iotrace_ring[nextidx].start_time_abs = sabs; | |
638 | cur_iotrace_ring[nextidx].duration = duration; | |
639 | ||
640 | iotrace_next[cpu_num] = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1); | |
641 | ||
642 | rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0], | |
94ff46dc A |
643 | MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true); |
644 | } | |
645 | ||
646 | static inline uint32_t | |
647 | traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr) | |
648 | { | |
649 | cpu_data_t *cdata; | |
650 | int cpu_num, nextidx; | |
651 | traptrace_entry_t *cur_traptrace_ring; | |
652 | ||
653 | if (__improbable(traptrace_enabled == 0 || traptrace_generators == 0)) { | |
654 | return TRAPTRACE_INVALID_INDEX; | |
655 | } | |
656 | ||
657 | assert(ml_get_interrupts_enabled() == FALSE); | |
658 | cdata = current_cpu_datap(); | |
659 | cpu_num = cdata->cpu_number; | |
660 | nextidx = traptrace_next[cpu_num]; | |
661 | /* prevent nested interrupts from clobbering this record */ | |
662 | traptrace_next[cpu_num] = ((nextidx + 1) >= traptrace_entries_per_cpu) ? 0 : (nextidx + 1); | |
663 | ||
664 | cur_traptrace_ring = traptrace_ring[cpu_num]; | |
665 | ||
666 | cur_traptrace_ring[nextidx].vector = vecnum; | |
667 | cur_traptrace_ring[nextidx].curthread = current_thread(); | |
668 | cur_traptrace_ring[nextidx].interrupted_pc = ipc; | |
669 | cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level; | |
670 | cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level; | |
671 | cur_traptrace_ring[nextidx].start_time_abs = sabs; | |
672 | cur_traptrace_ring[nextidx].duration = ~0ULL; | |
673 | ||
674 | rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0], | |
675 | MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false); | |
676 | ||
677 | assert(nextidx <= 0xFFFF); | |
678 | ||
679 | return ((unsigned)cpu_num << 16) | nextidx; | |
680 | } | |
681 | ||
682 | static inline void | |
683 | traptrace_end(uint32_t index, uint64_t eabs) | |
684 | { | |
685 | if (index != TRAPTRACE_INVALID_INDEX) { | |
686 | traptrace_entry_t *ttentp = &traptrace_ring[index >> 16][index & 0xFFFF]; | |
687 | ||
688 | ttentp->duration = eabs - ttentp->start_time_abs; | |
689 | } | |
0a7de745 | 690 | } |
94ff46dc | 691 | |
39037602 A |
692 | #endif /* DEVELOPMENT || DEBUG */ |
693 | ||
0a7de745 A |
694 | static inline void |
695 | pltrace(boolean_t plenable) | |
696 | { | |
39037602 A |
697 | #if DEVELOPMENT || DEBUG |
698 | if (__improbable(plctrace_enabled != 0)) { | |
699 | pltrace_internal(plenable); | |
700 | } | |
701 | #else | |
702 | (void)plenable; | |
703 | #endif | |
704 | } | |
b0d623f7 | 705 | |
91447636 | 706 | static inline void |
0a7de745 A |
707 | disable_preemption_internal(void) |
708 | { | |
39037602 A |
709 | assert(get_preemption_level() >= 0); |
710 | ||
cb323159 | 711 | os_compiler_barrier(release); |
39236c6e A |
712 | #if defined(__clang__) |
713 | cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; | |
714 | cpu_data->cpu_preemption_level++; | |
715 | #else | |
91447636 | 716 | __asm__ volatile ("incl %%gs:%P0" |
0a7de745 A |
717 | : |
718 | : "i" (offsetof(cpu_data_t, cpu_preemption_level))); | |
39236c6e | 719 | #endif |
cb323159 | 720 | os_compiler_barrier(acquire); |
39037602 | 721 | pltrace(FALSE); |
91447636 | 722 | } |
1c79356b | 723 | |
91447636 | 724 | static inline void |
0a7de745 A |
725 | enable_preemption_internal(void) |
726 | { | |
55e303ae | 727 | assert(get_preemption_level() > 0); |
39037602 | 728 | pltrace(TRUE); |
cb323159 | 729 | os_compiler_barrier(release); |
39236c6e A |
730 | #if defined(__clang__) |
731 | cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; | |
0a7de745 | 732 | if (0 == --cpu_data->cpu_preemption_level) { |
39236c6e | 733 | kernel_preempt_check(); |
0a7de745 | 734 | } |
39236c6e | 735 | #else |
91447636 | 736 | __asm__ volatile ("decl %%gs:%P0 \n\t" |
0a7de745 A |
737 | "jne 1f \n\t" |
738 | "call _kernel_preempt_check \n\t" | |
739 | "1:" | |
740 | : /* no outputs */ | |
741 | : "i" (offsetof(cpu_data_t, cpu_preemption_level)) | |
742 | : "eax", "ecx", "edx", "cc", "memory"); | |
39236c6e | 743 | #endif |
cb323159 | 744 | os_compiler_barrier(acquire); |
1c79356b A |
745 | } |
746 | ||
91447636 A |
747 | static inline void |
748 | enable_preemption_no_check(void) | |
1c79356b | 749 | { |
1c79356b | 750 | assert(get_preemption_level() > 0); |
1c79356b | 751 | |
39037602 | 752 | pltrace(TRUE); |
cb323159 | 753 | os_compiler_barrier(release); |
39236c6e A |
754 | #if defined(__clang__) |
755 | cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; | |
756 | cpu_data->cpu_preemption_level--; | |
757 | #else | |
91447636 | 758 | __asm__ volatile ("decl %%gs:%P0" |
0a7de745 A |
759 | : /* no outputs */ |
760 | : "i" (offsetof(cpu_data_t, cpu_preemption_level)) | |
761 | : "cc", "memory"); | |
39236c6e | 762 | #endif |
cb323159 | 763 | os_compiler_barrier(acquire); |
1c79356b A |
764 | } |
765 | ||
39037602 | 766 | static inline void |
0a7de745 A |
767 | _enable_preemption_no_check(void) |
768 | { | |
39037602 A |
769 | enable_preemption_no_check(); |
770 | } | |
771 | ||
91447636 A |
772 | static inline void |
773 | mp_disable_preemption(void) | |
1c79356b | 774 | { |
39037602 | 775 | disable_preemption_internal(); |
1c79356b A |
776 | } |
777 | ||
91447636 | 778 | static inline void |
39037602 | 779 | _mp_disable_preemption(void) |
1c79356b | 780 | { |
39037602 | 781 | disable_preemption_internal(); |
1c79356b A |
782 | } |
783 | ||
91447636 | 784 | static inline void |
39037602 | 785 | mp_enable_preemption(void) |
1c79356b | 786 | { |
39037602 A |
787 | enable_preemption_internal(); |
788 | } | |
789 | ||
790 | static inline void | |
0a7de745 A |
791 | _mp_enable_preemption(void) |
792 | { | |
39037602 A |
793 | enable_preemption_internal(); |
794 | } | |
795 | ||
796 | static inline void | |
0a7de745 A |
797 | mp_enable_preemption_no_check(void) |
798 | { | |
1c79356b | 799 | enable_preemption_no_check(); |
1c79356b A |
800 | } |
801 | ||
39037602 | 802 | static inline void |
0a7de745 A |
803 | _mp_enable_preemption_no_check(void) |
804 | { | |
39037602 | 805 | enable_preemption_no_check(); |
91447636 A |
806 | } |
807 | ||
39037602 A |
808 | #ifdef XNU_KERNEL_PRIVATE |
809 | #define disable_preemption() disable_preemption_internal() | |
810 | #define enable_preemption() enable_preemption_internal() | |
811 | #define MACHINE_PREEMPTION_MACROS (1) | |
812 | #endif | |
813 | ||
91447636 | 814 | static inline cpu_data_t * |
0a7de745 A |
815 | cpu_datap(int cpu) |
816 | { | |
91447636 A |
817 | return cpu_data_ptr[cpu]; |
818 | } | |
819 | ||
a39ff7e2 | 820 | static inline int |
0a7de745 A |
821 | cpu_is_running(int cpu) |
822 | { | |
823 | return (cpu_datap(cpu) != NULL) && (cpu_datap(cpu)->cpu_running); | |
a39ff7e2 A |
824 | } |
825 | ||
5c9f4661 A |
826 | #ifdef MACH_KERNEL_PRIVATE |
827 | static inline cpu_data_t * | |
0a7de745 A |
828 | cpu_shadowp(int cpu) |
829 | { | |
5c9f4661 A |
830 | return cpu_data_ptr[cpu]->cd_shadow; |
831 | } | |
832 | ||
833 | #endif | |
91447636 | 834 | extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); |
316670eb | 835 | extern void cpu_data_realloc(void); |
1c79356b | 836 | |
0a7de745 | 837 | #endif /* I386_CPU_DATA */ |