]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
0a7de745 | 2 | * Copyright (c) 2000-2019 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0a7de745 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
0a7de745 | 30 | * |
1c79356b A |
31 | */ |
32 | ||
0a7de745 | 33 | #ifndef I386_CPU_DATA |
1c79356b A |
34 | #define I386_CPU_DATA |
35 | ||
1c79356b | 36 | #include <mach_assert.h> |
cb323159 | 37 | #include <machine/atomic.h> |
1c79356b | 38 | |
1c79356b A |
39 | #include <kern/assert.h> |
40 | #include <kern/kern_types.h> | |
0a7de745 | 41 | #include <kern/mpqueue.h> |
b0d623f7 | 42 | #include <kern/queue.h> |
91447636 | 43 | #include <kern/processor.h> |
0c530ab8 | 44 | #include <kern/pms.h> |
55e303ae | 45 | #include <pexpert/pexpert.h> |
0c530ab8 | 46 | #include <mach/i386/thread_status.h> |
b0d623f7 | 47 | #include <mach/i386/vm_param.h> |
fe8ab488 | 48 | #include <i386/locks.h> |
6d2010ae | 49 | #include <i386/rtclock_protos.h> |
0c530ab8 | 50 | #include <i386/pmCPU.h> |
2d21ac55 | 51 | #include <i386/cpu_topology.h> |
5ba3f43e | 52 | #include <i386/seg.h> |
0a7de745 | 53 | #include <i386/mp.h> |
2d21ac55 | 54 | |
b0d623f7 | 55 | #if CONFIG_VMX |
2d21ac55 | 56 | #include <i386/vmx/vmx_cpu.h> |
b0d623f7 | 57 | #endif |
91447636 | 58 | |
5ba3f43e A |
59 | #if MONOTONIC |
60 | #include <machine/monotonic.h> | |
61 | #endif /* MONOTONIC */ | |
62 | ||
6d2010ae A |
63 | #include <machine/pal_routines.h> |
64 | ||
91447636 A |
65 | /* |
66 | * Data structures referenced (anonymously) from per-cpu data: | |
67 | */ | |
91447636 | 68 | struct cpu_cons_buffer; |
0c530ab8 | 69 | struct cpu_desc_table; |
2d21ac55 | 70 | struct mca_state; |
fe8ab488 | 71 | struct prngContext; |
91447636 | 72 | |
91447636 A |
73 | /* |
74 | * Data structures embedded in per-cpu data: | |
75 | */ | |
76 | typedef struct rtclock_timer { | |
0a7de745 A |
77 | mpqueue_head_t queue; |
78 | uint64_t deadline; | |
79 | uint64_t when_set; | |
80 | boolean_t has_expired; | |
91447636 A |
81 | } rtclock_timer_t; |
82 | ||
91447636 | 83 | typedef struct { |
5c9f4661 | 84 | /* The 'u' suffixed fields store the double-mapped descriptor addresses */ |
0a7de745 A |
85 | struct x86_64_tss *cdi_ktssu; |
86 | struct x86_64_tss *cdi_ktssb; | |
87 | x86_64_desc_register_t cdi_gdtu; | |
88 | x86_64_desc_register_t cdi_gdtb; | |
89 | x86_64_desc_register_t cdi_idtu; | |
90 | x86_64_desc_register_t cdi_idtb; | |
91 | struct real_descriptor *cdi_ldtu; | |
92 | struct real_descriptor *cdi_ldtb; | |
93 | vm_offset_t cdi_sstku; | |
94 | vm_offset_t cdi_sstkb; | |
b0d623f7 A |
95 | } cpu_desc_index_t; |
96 | ||
97 | typedef enum { | |
0a7de745 A |
98 | TASK_MAP_32BIT, /* 32-bit user, compatibility mode */ |
99 | TASK_MAP_64BIT, /* 64-bit user thread, shared space */ | |
b0d623f7 A |
100 | } task_map_t; |
101 | ||
b0d623f7 | 102 | |
0c530ab8 A |
103 | /* |
104 | * This structure is used on entry into the (uber-)kernel on syscall from | |
105 | * a 64-bit user. It contains the address of the machine state save area | |
106 | * for the current thread and a temporary place to save the user's rsp | |
107 | * before loading this address into rsp. | |
108 | */ | |
109 | typedef struct { | |
0a7de745 A |
110 | addr64_t cu_isf; /* thread->pcb->iss.isf */ |
111 | uint64_t cu_tmp; /* temporary scratch */ | |
112 | addr64_t cu_user_gs_base; | |
0c530ab8 | 113 | } cpu_uber_t; |
91447636 | 114 | |
0a7de745 A |
115 | typedef uint16_t pcid_t; |
116 | typedef uint8_t pcid_ref_t; | |
bd504ef0 A |
117 | |
118 | #define CPU_RTIME_BINS (12) | |
119 | #define CPU_ITIME_BINS (CPU_RTIME_BINS) | |
120 | ||
0a7de745 | 121 | #define MAX_TRACE_BTFRAMES (16) |
39037602 A |
122 | typedef struct { |
123 | boolean_t pltype; | |
124 | int plevel; | |
0a7de745 | 125 | uint64_t plbt[MAX_TRACE_BTFRAMES]; |
39037602 A |
126 | } plrecord_t; |
127 | ||
94ff46dc | 128 | #if DEVELOPMENT || DEBUG |
0a7de745 A |
129 | typedef enum { |
130 | IOTRACE_PHYS_READ = 1, | |
131 | IOTRACE_PHYS_WRITE, | |
132 | IOTRACE_IO_READ, | |
133 | IOTRACE_IO_WRITE, | |
134 | IOTRACE_PORTIO_READ, | |
135 | IOTRACE_PORTIO_WRITE | |
136 | } iotrace_type_e; | |
137 | ||
138 | typedef struct { | |
139 | iotrace_type_e iotype; | |
140 | int size; | |
141 | uint64_t vaddr; | |
142 | uint64_t paddr; | |
143 | uint64_t val; | |
144 | uint64_t start_time_abs; | |
145 | uint64_t duration; | |
146 | uint64_t backtrace[MAX_TRACE_BTFRAMES]; | |
147 | } iotrace_entry_t; | |
148 | ||
94ff46dc A |
149 | typedef struct { |
150 | int vector; /* Vector number of interrupt */ | |
151 | thread_t curthread; /* Current thread at the time of the interrupt */ | |
152 | uint64_t interrupted_pc; | |
153 | int curpl; /* Current preemption level */ | |
154 | int curil; /* Current interrupt level */ | |
155 | uint64_t start_time_abs; | |
156 | uint64_t duration; | |
157 | uint64_t backtrace[MAX_TRACE_BTFRAMES]; | |
158 | } traptrace_entry_t; | |
159 | ||
0a7de745 A |
160 | #define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64) |
161 | #define IOTRACE_MAX_ENTRIES_PER_CPU (256) | |
162 | extern volatile int mmiotrace_enabled; | |
163 | extern int iotrace_generators; | |
164 | extern int iotrace_entries_per_cpu; | |
165 | extern int *iotrace_next; | |
166 | extern iotrace_entry_t **iotrace_ring; | |
167 | ||
94ff46dc A |
168 | #define TRAPTRACE_INVALID_INDEX (~0U) |
169 | #define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16) | |
170 | #define TRAPTRACE_MAX_ENTRIES_PER_CPU (256) | |
171 | extern volatile int traptrace_enabled; | |
172 | extern int traptrace_generators; | |
173 | extern int traptrace_entries_per_cpu; | |
174 | extern int *traptrace_next; | |
175 | extern traptrace_entry_t **traptrace_ring; | |
0a7de745 A |
176 | #endif /* DEVELOPMENT || DEBUG */ |
177 | ||
91447636 A |
178 | /* |
179 | * Per-cpu data. | |
180 | * | |
181 | * Each processor has a per-cpu data area which is dereferenced through the | |
182 | * current_cpu_datap() macro. For speed, the %gs segment is based here, and | |
183 | * using this, inlines provides single-instruction access to frequently used | |
184 | * members - such as get_cpu_number()/cpu_number(), and get_active_thread()/ | |
0a7de745 A |
185 | * current_thread(). |
186 | * | |
91447636 A |
187 | * Cpu data owned by another processor can be accessed using the |
188 | * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu | |
189 | * pointers. | |
190 | */ | |
5c9f4661 | 191 | typedef struct { |
0a7de745 A |
192 | pcid_t cpu_pcid_free_hint; |
193 | #define PMAP_PCID_MAX_PCID (0x800) | |
194 | pcid_ref_t cpu_pcid_refcounts[PMAP_PCID_MAX_PCID]; | |
195 | pmap_t cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID]; | |
5c9f4661 A |
196 | } pcid_cdata_t; |
197 | ||
0a7de745 A |
198 | typedef struct cpu_data { |
199 | struct pal_cpu_data cpu_pal_data; /* PAL-specific data */ | |
200 | #define cpu_pd cpu_pal_data /* convenience alias */ | |
201 | struct cpu_data *cpu_this; /* pointer to myself */ | |
202 | thread_t cpu_active_thread; | |
203 | thread_t cpu_nthread; | |
204 | volatile int cpu_preemption_level; | |
205 | int cpu_number; /* Logical CPU */ | |
206 | void *cpu_int_state; /* interrupt state */ | |
207 | vm_offset_t cpu_active_stack; /* kernel stack base */ | |
208 | vm_offset_t cpu_kernel_stack; /* kernel stack top */ | |
209 | vm_offset_t cpu_int_stack_top; | |
210 | int cpu_interrupt_level; | |
211 | volatile int cpu_signals; /* IPI events */ | |
212 | volatile int cpu_prior_signals; /* Last set of events, | |
213 | * debugging | |
214 | */ | |
215 | ast_t cpu_pending_ast; | |
216 | volatile int cpu_running; | |
5ba3f43e | 217 | #if !MONOTONIC |
0a7de745 | 218 | boolean_t cpu_fixed_pmcs_enabled; |
5ba3f43e | 219 | #endif /* !MONOTONIC */ |
0a7de745 A |
220 | rtclock_timer_t rtclock_timer; |
221 | uint64_t quantum_timer_deadline; | |
222 | volatile addr64_t cpu_active_cr3 __attribute((aligned(64))); | |
6d2010ae A |
223 | union { |
224 | volatile uint32_t cpu_tlb_invalid; | |
225 | struct { | |
226 | volatile uint16_t cpu_tlb_invalid_local; | |
227 | volatile uint16_t cpu_tlb_invalid_global; | |
228 | }; | |
229 | }; | |
0a7de745 A |
230 | uint64_t cpu_ip_desc[2]; |
231 | volatile task_map_t cpu_task_map; | |
232 | volatile addr64_t cpu_task_cr3; | |
233 | addr64_t cpu_kernel_cr3; | |
a39ff7e2 | 234 | volatile addr64_t cpu_ucr3; |
0a7de745 A |
235 | volatile addr64_t cpu_shadowtask_cr3; |
236 | boolean_t cpu_pagezero_mapped; | |
237 | cpu_uber_t cpu_uber; | |
a39ff7e2 | 238 | /* Double-mapped per-CPU exception stack address */ |
0a7de745 A |
239 | uintptr_t cd_estack; |
240 | int cpu_xstate; | |
241 | int cpu_curtask_has_ldt; | |
242 | int cpu_curthread_do_segchk; | |
a39ff7e2 A |
243 | /* Address of shadowed, partially mirrored CPU data structures located |
244 | * in the double mapped PML4 | |
245 | */ | |
0a7de745 A |
246 | void *cd_shadow; |
247 | union { | |
248 | volatile uint32_t cpu_tlb_invalid_count; | |
249 | struct { | |
250 | volatile uint16_t cpu_tlb_invalid_local_count; | |
251 | volatile uint16_t cpu_tlb_invalid_global_count; | |
252 | }; | |
253 | }; | |
254 | ||
255 | uint16_t cpu_tlb_gen_counts_local[MAX_CPUS]; | |
256 | uint16_t cpu_tlb_gen_counts_global[MAX_CPUS]; | |
257 | ||
258 | struct processor *cpu_processor; | |
b0d623f7 | 259 | #if NCOPY_WINDOWS > 0 |
0a7de745 | 260 | struct cpu_pmap *cpu_pmap; |
b0d623f7 | 261 | #endif |
0a7de745 A |
262 | struct real_descriptor *cpu_ldtp; |
263 | struct cpu_desc_table *cpu_desc_tablep; | |
264 | cpu_desc_index_t cpu_desc_index; | |
265 | int cpu_ldt; | |
b0d623f7 | 266 | #if NCOPY_WINDOWS > 0 |
0a7de745 A |
267 | vm_offset_t cpu_copywindow_base; |
268 | uint64_t *cpu_copywindow_pdp; | |
2d21ac55 | 269 | |
0a7de745 A |
270 | vm_offset_t cpu_physwindow_base; |
271 | uint64_t *cpu_physwindow_ptep; | |
b0d623f7 A |
272 | #endif |
273 | ||
6d2010ae | 274 | #define HWINTCNT_SIZE 256 |
0a7de745 A |
275 | uint32_t cpu_hwIntCnt[HWINTCNT_SIZE]; /* Interrupt counts */ |
276 | uint64_t cpu_hwIntpexits[HWINTCNT_SIZE]; | |
277 | uint64_t cpu_dr7; /* debug control register */ | |
278 | uint64_t cpu_int_event_time; /* intr entry/exit time */ | |
279 | pal_rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ | |
39236c6e A |
280 | #if KPC |
281 | /* double-buffered performance counter data */ | |
282 | uint64_t *cpu_kpc_buf[2]; | |
283 | /* PMC shadow and reload value buffers */ | |
284 | uint64_t *cpu_kpc_shadow; | |
285 | uint64_t *cpu_kpc_reload; | |
286 | #endif | |
5ba3f43e A |
287 | #if MONOTONIC |
288 | struct mt_cpu cpu_monotonic; | |
289 | #endif /* MONOTONIC */ | |
0a7de745 A |
290 | uint32_t cpu_pmap_pcid_enabled; |
291 | pcid_t cpu_active_pcid; | |
292 | pcid_t cpu_last_pcid; | |
293 | pcid_t cpu_kernel_pcid; | |
294 | volatile pcid_ref_t *cpu_pmap_pcid_coherentp; | |
295 | volatile pcid_ref_t *cpu_pmap_pcid_coherentp_kernel; | |
296 | pcid_cdata_t *cpu_pcid_data; | |
297 | #ifdef PCID_STATS | |
298 | uint64_t cpu_pmap_pcid_flushes; | |
299 | uint64_t cpu_pmap_pcid_preserves; | |
6d2010ae | 300 | #endif |
0a7de745 A |
301 | uint64_t cpu_aperf; |
302 | uint64_t cpu_mperf; | |
303 | uint64_t cpu_c3res; | |
304 | uint64_t cpu_c6res; | |
305 | uint64_t cpu_c7res; | |
306 | uint64_t cpu_itime_total; | |
307 | uint64_t cpu_rtime_total; | |
308 | uint64_t cpu_ixtime; | |
bd504ef0 | 309 | uint64_t cpu_idle_exits; |
0a7de745 A |
310 | uint64_t cpu_rtimes[CPU_RTIME_BINS]; |
311 | uint64_t cpu_itimes[CPU_ITIME_BINS]; | |
5ba3f43e | 312 | #if !MONOTONIC |
0a7de745 A |
313 | uint64_t cpu_cur_insns; |
314 | uint64_t cpu_cur_ucc; | |
315 | uint64_t cpu_cur_urc; | |
5ba3f43e | 316 | #endif /* !MONOTONIC */ |
0a7de745 | 317 | uint64_t cpu_gpmcs[4]; |
6d2010ae A |
318 | uint64_t cpu_max_observed_int_latency; |
319 | int cpu_max_observed_int_latency_vector; | |
0a7de745 A |
320 | volatile boolean_t cpu_NMI_acknowledged; |
321 | uint64_t debugger_entry_time; | |
322 | uint64_t debugger_ipi_time; | |
060df5ea A |
323 | /* A separate nested interrupt stack flag, to account |
324 | * for non-nested interrupts arriving while on the interrupt stack | |
325 | * Currently only occurs when AICPM enables interrupts on the | |
326 | * interrupt stack during processor offlining. | |
327 | */ | |
0a7de745 A |
328 | uint32_t cpu_nested_istack; |
329 | uint32_t cpu_nested_istack_events; | |
330 | x86_saved_state64_t *cpu_fatal_trap_state; | |
331 | x86_saved_state64_t *cpu_post_fatal_trap_state; | |
bd504ef0 | 332 | #if CONFIG_VMX |
0a7de745 | 333 | vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */ |
bd504ef0 A |
334 | #endif |
335 | #if CONFIG_MCA | |
0a7de745 | 336 | struct mca_state *cpu_mca_state; /* State at MC fault */ |
bd504ef0 | 337 | #endif |
0a7de745 A |
338 | int cpu_type; |
339 | int cpu_subtype; | |
340 | int cpu_threadtype; | |
341 | boolean_t cpu_iflag; | |
342 | boolean_t cpu_boot_complete; | |
343 | int cpu_hibernate; | |
5c9f4661 | 344 | #define MAX_PREEMPTION_RECORDS (8) |
0a7de745 A |
345 | #if DEVELOPMENT || DEBUG |
346 | int cpu_plri; | |
347 | plrecord_t plrecords[MAX_PREEMPTION_RECORDS]; | |
5c9f4661 | 348 | #endif |
0a7de745 A |
349 | void *cpu_console_buf; |
350 | struct x86_lcpu lcpu; | |
351 | int cpu_phys_number; /* Physical CPU */ | |
352 | cpu_id_t cpu_id; /* Platform Expert */ | |
5c9f4661 | 353 | #if DEBUG |
0a7de745 A |
354 | uint64_t cpu_entry_cr3; |
355 | uint64_t cpu_exit_cr3; | |
356 | uint64_t cpu_pcid_last_cr3; | |
39037602 | 357 | #endif |
0a7de745 | 358 | boolean_t cpu_rendezvous_in_progress; |
55e303ae | 359 | } cpu_data_t; |
1c79356b | 360 | |
0a7de745 | 361 | extern cpu_data_t *cpu_data_ptr[]; |
9bccf70c | 362 | |
55e303ae | 363 | /* Macro to generate inline bodies to retrieve per-cpu data fields. */ |
39236c6e A |
364 | #if defined(__clang__) |
365 | #define GS_RELATIVE volatile __attribute__((address_space(256))) | |
366 | #ifndef offsetof | |
0a7de745 | 367 | #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE,MEMBER) |
39236c6e A |
368 | #endif |
369 | ||
0a7de745 A |
370 | #define CPU_DATA_GET(member, type) \ |
371 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
372 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
373 | type ret; \ | |
374 | ret = cpu_data->member; \ | |
39236c6e A |
375 | return ret; |
376 | ||
0a7de745 A |
377 | #define CPU_DATA_GET_INDEX(member, index, type) \ |
378 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
379 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
380 | type ret; \ | |
381 | ret = cpu_data->member[index]; \ | |
39236c6e A |
382 | return ret; |
383 | ||
0a7de745 A |
384 | #define CPU_DATA_SET(member, value) \ |
385 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
386 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
39236c6e A |
387 | cpu_data->member = value; |
388 | ||
0a7de745 A |
389 | #define CPU_DATA_XCHG(member, value, type) \ |
390 | cpu_data_t GS_RELATIVE *cpu_data = \ | |
391 | (cpu_data_t GS_RELATIVE *)0UL; \ | |
392 | type ret; \ | |
393 | ret = cpu_data->member; \ | |
394 | cpu_data->member = value; \ | |
39236c6e A |
395 | return ret; |
396 | ||
397 | #else /* !defined(__clang__) */ | |
398 | ||
2d21ac55 | 399 | #ifndef offsetof |
0a7de745 | 400 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) |
2d21ac55 | 401 | #endif /* offsetof */ |
0a7de745 A |
402 | #define CPU_DATA_GET(member, type) \ |
403 | type ret; \ | |
404 | __asm__ volatile ("mov %%gs:%P1,%0" \ | |
405 | : "=r" (ret) \ | |
406 | : "i" (offsetof(cpu_data_t,member))); \ | |
55e303ae | 407 | return ret; |
9bccf70c | 408 | |
0a7de745 A |
409 | #define CPU_DATA_GET_INDEX(member, index, type) \ |
410 | type ret; \ | |
411 | __asm__ volatile ("mov %%gs:(%1),%0" \ | |
412 | : "=r" (ret) \ | |
413 | : "r" (offsetof(cpu_data_t,member[index]))); \ | |
6d2010ae A |
414 | return ret; |
415 | ||
0a7de745 A |
416 | #define CPU_DATA_SET(member, value) \ |
417 | __asm__ volatile ("mov %0,%%gs:%P1" \ | |
418 | : \ | |
419 | : "r" (value), "i" (offsetof(cpu_data_t,member))); | |
39236c6e | 420 | |
0a7de745 A |
421 | #define CPU_DATA_XCHG(member, value, type) \ |
422 | type ret; \ | |
423 | __asm__ volatile ("xchg %0,%%gs:%P1" \ | |
424 | : "=r" (ret) \ | |
425 | : "i" (offsetof(cpu_data_t,member)), "0" (value)); \ | |
6d2010ae A |
426 | return ret; |
427 | ||
39236c6e A |
428 | #endif /* !defined(__clang__) */ |
429 | ||
1c79356b A |
430 | /* |
431 | * Everyone within the osfmk part of the kernel can use the fast | |
432 | * inline versions of these routines. Everyone outside, must call | |
433 | * the real thing, | |
434 | */ | |
d9a64523 A |
435 | |
436 | ||
437 | /* | |
438 | * The "volatile" flavor of current_thread() is intended for use by | |
439 | * scheduler code which may need to update the thread pointer in the | |
440 | * course of a context switch. Any call to current_thread() made | |
441 | * prior to the thread pointer update should be safe to optimize away | |
442 | * as it should be consistent with that thread's state to the extent | |
443 | * the compiler can reason about it. Likewise, the context switch | |
444 | * path will eventually result in an arbitrary branch to the new | |
445 | * thread's pc, about which the compiler won't be able to reason. | |
446 | * Thus any compile-time optimization of current_thread() calls made | |
447 | * within the new thread should be safely encapsulated in its | |
448 | * register/stack state. The volatile form therefore exists to cover | |
449 | * the window between the thread pointer update and the branch to | |
450 | * the new pc. | |
451 | */ | |
91447636 | 452 | static inline thread_t |
d9a64523 A |
453 | get_active_thread_volatile(void) |
454 | { | |
0a7de745 | 455 | CPU_DATA_GET(cpu_active_thread, thread_t) |
d9a64523 A |
456 | } |
457 | ||
cb323159 | 458 | static inline __attribute__((const)) thread_t |
91447636 | 459 | get_active_thread(void) |
1c79356b | 460 | { |
0a7de745 | 461 | CPU_DATA_GET(cpu_active_thread, thread_t) |
1c79356b | 462 | } |
d9a64523 | 463 | |
0a7de745 A |
464 | #define current_thread_fast() get_active_thread() |
465 | #define current_thread_volatile() get_active_thread_volatile() | |
466 | #define current_thread() current_thread_fast() | |
1c79356b | 467 | |
0a7de745 | 468 | #define cpu_mode_is64bit() TRUE |
0c530ab8 | 469 | |
91447636 A |
470 | static inline int |
471 | get_preemption_level(void) | |
1c79356b | 472 | { |
0a7de745 | 473 | CPU_DATA_GET(cpu_preemption_level, int) |
55e303ae | 474 | } |
91447636 | 475 | static inline int |
91447636 | 476 | get_interrupt_level(void) |
55e303ae | 477 | { |
0a7de745 | 478 | CPU_DATA_GET(cpu_interrupt_level, int) |
55e303ae | 479 | } |
91447636 A |
480 | static inline int |
481 | get_cpu_number(void) | |
55e303ae | 482 | { |
0a7de745 | 483 | CPU_DATA_GET(cpu_number, int) |
55e303ae | 484 | } |
91447636 A |
485 | static inline int |
486 | get_cpu_phys_number(void) | |
55e303ae | 487 | { |
0a7de745 | 488 | CPU_DATA_GET(cpu_phys_number, int) |
1c79356b | 489 | } |
1c79356b | 490 | |
39037602 | 491 | static inline cpu_data_t * |
0a7de745 A |
492 | current_cpu_datap(void) |
493 | { | |
39037602 A |
494 | CPU_DATA_GET(cpu_this, cpu_data_t *); |
495 | } | |
496 | ||
497 | /* | |
498 | * Facility to diagnose preemption-level imbalances, which are otherwise | |
499 | * challenging to debug. On each operation that enables or disables preemption, | |
500 | * we record a backtrace into a per-CPU ring buffer, along with the current | |
501 | * preemption level and operation type. Thus, if an imbalance is observed, | |
502 | * one can examine these per-CPU records to determine which codepath failed | |
503 | * to re-enable preemption, enabled premption without a corresponding | |
504 | * disablement etc. The backtracer determines which stack is currently active, | |
505 | * and uses that to perform bounds checks on unterminated stacks. | |
506 | * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15) | |
507 | * The bounds check currently doesn't account for non-default thread stack sizes. | |
508 | */ | |
509 | #if DEVELOPMENT || DEBUG | |
0a7de745 | 510 | static inline void |
94ff46dc | 511 | rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata, uint64_t frameptr, bool use_cursp) |
0a7de745 A |
512 | { |
513 | extern uint32_t low_intstack[]; /* bottom */ | |
514 | extern uint32_t low_eintstack[]; /* top */ | |
515 | extern char mp_slave_stack[]; | |
94ff46dc | 516 | int btidx = 0; |
39037602 | 517 | |
0a7de745 | 518 | uint64_t kstackb, kstackt; |
39037602 | 519 | |
39037602 A |
520 | /* Obtain the 'current' program counter, initial backtrace |
521 | * element. This will also indicate if we were unable to | |
522 | * trace further up the stack for some reason | |
523 | */ | |
94ff46dc A |
524 | if (use_cursp) { |
525 | __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" | |
526 | : "=m" (rets[btidx++]) | |
527 | : | |
528 | : "rax"); | |
529 | } | |
39037602 A |
530 | |
531 | thread_t cplthread = cdata->cpu_active_thread; | |
532 | if (cplthread) { | |
533 | uintptr_t csp; | |
94ff46dc A |
534 | if (use_cursp == true) { |
535 | __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):); | |
536 | } else { | |
537 | csp = frameptr; | |
538 | } | |
39037602 A |
539 | /* Determine which stack we're on to populate stack bounds. |
540 | * We don't need to trace across stack boundaries for this | |
541 | * routine. | |
542 | */ | |
543 | kstackb = cdata->cpu_active_stack; | |
544 | kstackt = kstackb + KERNEL_STACK_SIZE; | |
545 | if (csp < kstackb || csp > kstackt) { | |
546 | kstackt = cdata->cpu_kernel_stack; | |
0a7de745 | 547 | kstackb = kstackt - KERNEL_STACK_SIZE; |
39037602 A |
548 | if (csp < kstackb || csp > kstackt) { |
549 | kstackt = cdata->cpu_int_stack_top; | |
550 | kstackb = kstackt - INTSTACK_SIZE; | |
551 | if (csp < kstackb || csp > kstackt) { | |
552 | kstackt = (uintptr_t)low_eintstack; | |
0a7de745 | 553 | kstackb = kstackt - INTSTACK_SIZE; |
39037602 A |
554 | if (csp < kstackb || csp > kstackt) { |
555 | kstackb = (uintptr_t) mp_slave_stack; | |
0a7de745 A |
556 | kstackt = kstackb + PAGE_SIZE; |
557 | } else { | |
558 | kstackb = 0; | |
559 | kstackt = 0; | |
39037602 A |
560 | } |
561 | } | |
562 | } | |
563 | } | |
564 | ||
0a7de745 | 565 | if (__probable(kstackb && kstackt)) { |
94ff46dc | 566 | uint64_t *cfp = (uint64_t *) frameptr; |
0a7de745 A |
567 | int rbbtf; |
568 | ||
94ff46dc | 569 | for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) { |
0a7de745 A |
570 | if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) { |
571 | rets[rbbtf] = 0; | |
572 | continue; | |
573 | } | |
574 | rets[rbbtf] = *(cfp + 1); | |
575 | cfp = (uint64_t *) (*cfp); | |
576 | } | |
39037602 A |
577 | } |
578 | } | |
579 | } | |
580 | ||
0a7de745 A |
581 | static inline void |
582 | pltrace_internal(boolean_t enable) | |
583 | { | |
584 | cpu_data_t *cdata = current_cpu_datap(); | |
585 | int cpli = cdata->cpu_preemption_level; | |
586 | int cplrecord = cdata->cpu_plri; | |
587 | uint64_t *plbts; | |
588 | ||
589 | assert(cpli >= 0); | |
590 | ||
591 | cdata->plrecords[cplrecord].pltype = enable; | |
592 | cdata->plrecords[cplrecord].plevel = cpli; | |
593 | ||
594 | plbts = &cdata->plrecords[cplrecord].plbt[0]; | |
595 | ||
596 | cplrecord++; | |
597 | ||
598 | if (cplrecord >= MAX_PREEMPTION_RECORDS) { | |
599 | cplrecord = 0; | |
600 | } | |
601 | ||
602 | cdata->cpu_plri = cplrecord; | |
603 | ||
94ff46dc | 604 | rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true); |
0a7de745 A |
605 | } |
606 | ||
39037602 | 607 | extern int plctrace_enabled; |
0a7de745 A |
608 | |
609 | static inline void | |
610 | iotrace(iotrace_type_e type, uint64_t vaddr, uint64_t paddr, int size, uint64_t val, | |
611 | uint64_t sabs, uint64_t duration) | |
612 | { | |
613 | cpu_data_t *cdata; | |
614 | int cpu_num, nextidx; | |
615 | iotrace_entry_t *cur_iotrace_ring; | |
616 | ||
617 | if (__improbable(mmiotrace_enabled == 0 || iotrace_generators == 0)) { | |
618 | return; | |
619 | } | |
620 | ||
621 | cdata = current_cpu_datap(); | |
622 | cpu_num = cdata->cpu_number; | |
623 | nextidx = iotrace_next[cpu_num]; | |
624 | cur_iotrace_ring = iotrace_ring[cpu_num]; | |
625 | ||
626 | cur_iotrace_ring[nextidx].iotype = type; | |
627 | cur_iotrace_ring[nextidx].vaddr = vaddr; | |
628 | cur_iotrace_ring[nextidx].paddr = paddr; | |
629 | cur_iotrace_ring[nextidx].size = size; | |
630 | cur_iotrace_ring[nextidx].val = val; | |
631 | cur_iotrace_ring[nextidx].start_time_abs = sabs; | |
632 | cur_iotrace_ring[nextidx].duration = duration; | |
633 | ||
634 | iotrace_next[cpu_num] = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1); | |
635 | ||
636 | rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0], | |
94ff46dc A |
637 | MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true); |
638 | } | |
639 | ||
640 | static inline uint32_t | |
641 | traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr) | |
642 | { | |
643 | cpu_data_t *cdata; | |
644 | int cpu_num, nextidx; | |
645 | traptrace_entry_t *cur_traptrace_ring; | |
646 | ||
647 | if (__improbable(traptrace_enabled == 0 || traptrace_generators == 0)) { | |
648 | return TRAPTRACE_INVALID_INDEX; | |
649 | } | |
650 | ||
651 | assert(ml_get_interrupts_enabled() == FALSE); | |
652 | cdata = current_cpu_datap(); | |
653 | cpu_num = cdata->cpu_number; | |
654 | nextidx = traptrace_next[cpu_num]; | |
655 | /* prevent nested interrupts from clobbering this record */ | |
656 | traptrace_next[cpu_num] = ((nextidx + 1) >= traptrace_entries_per_cpu) ? 0 : (nextidx + 1); | |
657 | ||
658 | cur_traptrace_ring = traptrace_ring[cpu_num]; | |
659 | ||
660 | cur_traptrace_ring[nextidx].vector = vecnum; | |
661 | cur_traptrace_ring[nextidx].curthread = current_thread(); | |
662 | cur_traptrace_ring[nextidx].interrupted_pc = ipc; | |
663 | cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level; | |
664 | cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level; | |
665 | cur_traptrace_ring[nextidx].start_time_abs = sabs; | |
666 | cur_traptrace_ring[nextidx].duration = ~0ULL; | |
667 | ||
668 | rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0], | |
669 | MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false); | |
670 | ||
671 | assert(nextidx <= 0xFFFF); | |
672 | ||
673 | return ((unsigned)cpu_num << 16) | nextidx; | |
674 | } | |
675 | ||
676 | static inline void | |
677 | traptrace_end(uint32_t index, uint64_t eabs) | |
678 | { | |
679 | if (index != TRAPTRACE_INVALID_INDEX) { | |
680 | traptrace_entry_t *ttentp = &traptrace_ring[index >> 16][index & 0xFFFF]; | |
681 | ||
682 | ttentp->duration = eabs - ttentp->start_time_abs; | |
683 | } | |
0a7de745 | 684 | } |
94ff46dc | 685 | |
39037602 A |
686 | #endif /* DEVELOPMENT || DEBUG */ |
687 | ||
0a7de745 A |
688 | static inline void |
689 | pltrace(boolean_t plenable) | |
690 | { | |
39037602 A |
691 | #if DEVELOPMENT || DEBUG |
692 | if (__improbable(plctrace_enabled != 0)) { | |
693 | pltrace_internal(plenable); | |
694 | } | |
695 | #else | |
696 | (void)plenable; | |
697 | #endif | |
698 | } | |
b0d623f7 | 699 | |
91447636 | 700 | static inline void |
0a7de745 A |
701 | disable_preemption_internal(void) |
702 | { | |
39037602 A |
703 | assert(get_preemption_level() >= 0); |
704 | ||
cb323159 | 705 | os_compiler_barrier(release); |
39236c6e A |
706 | #if defined(__clang__) |
707 | cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; | |
708 | cpu_data->cpu_preemption_level++; | |
709 | #else | |
91447636 | 710 | __asm__ volatile ("incl %%gs:%P0" |
0a7de745 A |
711 | : |
712 | : "i" (offsetof(cpu_data_t, cpu_preemption_level))); | |
39236c6e | 713 | #endif |
cb323159 | 714 | os_compiler_barrier(acquire); |
39037602 | 715 | pltrace(FALSE); |
91447636 | 716 | } |
1c79356b | 717 | |
91447636 | 718 | static inline void |
0a7de745 A |
719 | enable_preemption_internal(void) |
720 | { | |
55e303ae | 721 | assert(get_preemption_level() > 0); |
39037602 | 722 | pltrace(TRUE); |
cb323159 | 723 | os_compiler_barrier(release); |
39236c6e A |
724 | #if defined(__clang__) |
725 | cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; | |
0a7de745 | 726 | if (0 == --cpu_data->cpu_preemption_level) { |
39236c6e | 727 | kernel_preempt_check(); |
0a7de745 | 728 | } |
39236c6e | 729 | #else |
91447636 | 730 | __asm__ volatile ("decl %%gs:%P0 \n\t" |
0a7de745 A |
731 | "jne 1f \n\t" |
732 | "call _kernel_preempt_check \n\t" | |
733 | "1:" | |
734 | : /* no outputs */ | |
735 | : "i" (offsetof(cpu_data_t, cpu_preemption_level)) | |
736 | : "eax", "ecx", "edx", "cc", "memory"); | |
39236c6e | 737 | #endif |
cb323159 | 738 | os_compiler_barrier(acquire); |
1c79356b A |
739 | } |
740 | ||
91447636 A |
741 | static inline void |
742 | enable_preemption_no_check(void) | |
1c79356b | 743 | { |
1c79356b | 744 | assert(get_preemption_level() > 0); |
1c79356b | 745 | |
39037602 | 746 | pltrace(TRUE); |
cb323159 | 747 | os_compiler_barrier(release); |
39236c6e A |
748 | #if defined(__clang__) |
749 | cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; | |
750 | cpu_data->cpu_preemption_level--; | |
751 | #else | |
91447636 | 752 | __asm__ volatile ("decl %%gs:%P0" |
0a7de745 A |
753 | : /* no outputs */ |
754 | : "i" (offsetof(cpu_data_t, cpu_preemption_level)) | |
755 | : "cc", "memory"); | |
39236c6e | 756 | #endif |
cb323159 | 757 | os_compiler_barrier(acquire); |
1c79356b A |
758 | } |
759 | ||
39037602 | 760 | static inline void |
0a7de745 A |
761 | _enable_preemption_no_check(void) |
762 | { | |
39037602 A |
763 | enable_preemption_no_check(); |
764 | } | |
765 | ||
91447636 A |
766 | static inline void |
767 | mp_disable_preemption(void) | |
1c79356b | 768 | { |
39037602 | 769 | disable_preemption_internal(); |
1c79356b A |
770 | } |
771 | ||
91447636 | 772 | static inline void |
39037602 | 773 | _mp_disable_preemption(void) |
1c79356b | 774 | { |
39037602 | 775 | disable_preemption_internal(); |
1c79356b A |
776 | } |
777 | ||
91447636 | 778 | static inline void |
39037602 | 779 | mp_enable_preemption(void) |
1c79356b | 780 | { |
39037602 A |
781 | enable_preemption_internal(); |
782 | } | |
783 | ||
784 | static inline void | |
0a7de745 A |
785 | _mp_enable_preemption(void) |
786 | { | |
39037602 A |
787 | enable_preemption_internal(); |
788 | } | |
789 | ||
790 | static inline void | |
0a7de745 A |
791 | mp_enable_preemption_no_check(void) |
792 | { | |
1c79356b | 793 | enable_preemption_no_check(); |
1c79356b A |
794 | } |
795 | ||
39037602 | 796 | static inline void |
0a7de745 A |
797 | _mp_enable_preemption_no_check(void) |
798 | { | |
39037602 | 799 | enable_preemption_no_check(); |
91447636 A |
800 | } |
801 | ||
39037602 A |
802 | #ifdef XNU_KERNEL_PRIVATE |
803 | #define disable_preemption() disable_preemption_internal() | |
804 | #define enable_preemption() enable_preemption_internal() | |
805 | #define MACHINE_PREEMPTION_MACROS (1) | |
806 | #endif | |
807 | ||
91447636 | 808 | static inline cpu_data_t * |
0a7de745 A |
809 | cpu_datap(int cpu) |
810 | { | |
91447636 A |
811 | return cpu_data_ptr[cpu]; |
812 | } | |
813 | ||
a39ff7e2 | 814 | static inline int |
0a7de745 A |
815 | cpu_is_running(int cpu) |
816 | { | |
817 | return (cpu_datap(cpu) != NULL) && (cpu_datap(cpu)->cpu_running); | |
a39ff7e2 A |
818 | } |
819 | ||
5c9f4661 A |
820 | #ifdef MACH_KERNEL_PRIVATE |
821 | static inline cpu_data_t * | |
0a7de745 A |
822 | cpu_shadowp(int cpu) |
823 | { | |
5c9f4661 A |
824 | return cpu_data_ptr[cpu]->cd_shadow; |
825 | } | |
826 | ||
827 | #endif | |
91447636 | 828 | extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); |
316670eb | 829 | extern void cpu_data_realloc(void); |
1c79356b | 830 | |
0a7de745 | 831 | #endif /* I386_CPU_DATA */ |