]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/machine_check.c
23f26fc50db41d21f45853763782b42f16681c8b
[apple/xnu.git] / osfmk / i386 / machine_check.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/kalloc.h>
30 #include <mach/mach_time.h>
31 #include <i386/cpu_data.h>
32 #include <i386/cpuid.h>
33 #include <i386/cpu_topology.h>
34 #include <i386/cpu_threads.h>
35 #include <i386/machine_cpu.h>
36 #include <i386/machine_check.h>
37 #include <i386/proc_reg.h>
38
39 #define IF(bool,str) ((bool) ? (str) : "")
40
41 static boolean_t mca_initialized = FALSE;
42 static boolean_t mca_MCE_present = FALSE;
43 static boolean_t mca_MCA_present = FALSE;
44 static uint32_t mca_family = 0;
45 static unsigned int mca_error_bank_count = 0;
46 static boolean_t mca_control_MSR_present = FALSE;
47 static boolean_t mca_threshold_status_present = FALSE;
48 static boolean_t mca_extended_MSRs_present = FALSE;
49 static unsigned int mca_extended_MSRs_count = 0;
50 static ia32_mcg_cap_t ia32_mcg_cap;
51 decl_simple_lock_data(static, mca_lock);
52
53 typedef struct {
54 ia32_mci_ctl_t mca_mci_ctl;
55 ia32_mci_status_t mca_mci_status;
56 ia32_mci_misc_t mca_mci_misc;
57 ia32_mci_addr_t mca_mci_addr;
58 } mca_mci_bank_t;
59
60 typedef struct mca_state {
61 ia32_mcg_ctl_t mca_mcg_ctl;
62 ia32_mcg_status_t mca_mcg_status;
63 mca_mci_bank_t mca_error_bank[0];
64 } mca_state_t;
65
66 typedef enum {
67 CLEAR,
68 DUMPING,
69 DUMPED
70 } mca_dump_state_t;
71 static volatile mca_dump_state_t mca_dump_state = CLEAR;
72
73 static void
74 mca_get_availability(void)
75 {
76 uint64_t features = cpuid_info()->cpuid_features;
77 uint32_t family = cpuid_info()->cpuid_family;
78
79 mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0;
80 mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0;
81 mca_family = family;
82
83 /*
84 * If MCA, the number of banks etc is reported by the IA32_MCG_CAP MSR.
85 */
86 if (mca_MCA_present) {
87 ia32_mcg_cap.u64 = rdmsr64(IA32_MCG_CAP);
88 mca_error_bank_count = ia32_mcg_cap.bits.count;
89 mca_control_MSR_present = ia32_mcg_cap.bits.mcg_ctl_p;
90 mca_threshold_status_present = ia32_mcg_cap.bits.mcg_tes_p;
91 if (family == 0x0F) {
92 mca_extended_MSRs_present = ia32_mcg_cap.bits.mcg_ext_p;
93 mca_extended_MSRs_count = ia32_mcg_cap.bits.mcg_ext_cnt;
94 }
95 }
96 }
97
98 void
99 mca_cpu_init(void)
100 {
101 unsigned int i;
102
103 /*
104 * The first (boot) processor is responsible for discovering the
105 * machine check architecture present on this machine.
106 */
107 if (!mca_initialized) {
108 mca_get_availability();
109 mca_initialized = TRUE;
110 simple_lock_init(&mca_lock, 0);
111 }
112
113 if (mca_MCA_present) {
114
115 /* Enable all MCA features */
116 if (mca_control_MSR_present)
117 wrmsr64(IA32_MCG_CTL, IA32_MCG_CTL_ENABLE);
118
119 switch (mca_family) {
120 case 0x06:
121 /* Enable all but mc0 */
122 for (i = 1; i < mca_error_bank_count; i++)
123 wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL);
124
125 /* Clear all errors */
126 for (i = 0; i < mca_error_bank_count; i++)
127 wrmsr64(IA32_MCi_STATUS(i), 0ULL);
128 break;
129 case 0x0F:
130 /* Enable all banks */
131 for (i = 0; i < mca_error_bank_count; i++)
132 wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL);
133
134 /* Clear all errors */
135 for (i = 0; i < mca_error_bank_count; i++)
136 wrmsr64(IA32_MCi_STATUS(i), 0ULL);
137 break;
138 }
139 }
140
141 /* Enable machine check exception handling if available */
142 if (mca_MCE_present) {
143 set_cr4(get_cr4()|CR4_MCE);
144 }
145 }
146
147 void
148 mca_cpu_alloc(cpu_data_t *cdp)
149 {
150 vm_size_t mca_state_size;
151
152 /*
153 * Allocate space for an array of error banks.
154 */
155 mca_state_size = sizeof(mca_state_t) +
156 sizeof(mca_mci_bank_t) * mca_error_bank_count;
157 cdp->cpu_mca_state = kalloc(mca_state_size);
158 if (cdp->cpu_mca_state == NULL) {
159 printf("mca_cpu_alloc() failed for cpu %d\n", cdp->cpu_number);
160 return;
161 }
162 bzero((void *) cdp->cpu_mca_state, mca_state_size);
163
164 /*
165 * If the boot processor is yet have its allocation made,
166 * do this now.
167 */
168 if (cpu_datap(master_cpu)->cpu_mca_state == NULL)
169 mca_cpu_alloc(cpu_datap(master_cpu));
170 }
171
172 static void
173 mca_save_state(mca_state_t *mca_state)
174 {
175 mca_mci_bank_t *bank;
176 unsigned int i;
177
178 assert(!ml_get_interrupts_enabled() || get_preemption_level() > 0);
179
180 if (mca_state == NULL)
181 return;
182
183 mca_state->mca_mcg_ctl = mca_control_MSR_present ?
184 rdmsr64(IA32_MCG_CTL) : 0ULL;
185 mca_state->mca_mcg_status.u64 = rdmsr64(IA32_MCG_STATUS);
186
187 bank = (mca_mci_bank_t *) &mca_state->mca_error_bank[0];
188 for (i = 0; i < mca_error_bank_count; i++, bank++) {
189 bank->mca_mci_ctl = rdmsr64(IA32_MCi_CTL(i));
190 bank->mca_mci_status.u64 = rdmsr64(IA32_MCi_STATUS(i));
191 if (!bank->mca_mci_status.bits.val)
192 continue;
193 bank->mca_mci_misc = (bank->mca_mci_status.bits.miscv)?
194 rdmsr64(IA32_MCi_MISC(i)) : 0ULL;
195 bank->mca_mci_addr = (bank->mca_mci_status.bits.addrv)?
196 rdmsr64(IA32_MCi_ADDR(i)) : 0ULL;
197 }
198 }
199
200 void
201 mca_check_save(void)
202 {
203 if (mca_dump_state > CLEAR)
204 mca_save_state(current_cpu_datap()->cpu_mca_state);
205 }
206
207 static void mca_dump_64bit_state(void)
208 {
209 kdb_printf("Extended Machine Check State:\n");
210 kdb_printf(" IA32_MCG_RAX: 0x%016qx\n", rdmsr64(IA32_MCG_RAX));
211 kdb_printf(" IA32_MCG_RBX: 0x%016qx\n", rdmsr64(IA32_MCG_RBX));
212 kdb_printf(" IA32_MCG_RCX: 0x%016qx\n", rdmsr64(IA32_MCG_RCX));
213 kdb_printf(" IA32_MCG_RDX: 0x%016qx\n", rdmsr64(IA32_MCG_RDX));
214 kdb_printf(" IA32_MCG_RSI: 0x%016qx\n", rdmsr64(IA32_MCG_RSI));
215 kdb_printf(" IA32_MCG_RDI: 0x%016qx\n", rdmsr64(IA32_MCG_RDI));
216 kdb_printf(" IA32_MCG_RBP: 0x%016qx\n", rdmsr64(IA32_MCG_RBP));
217 kdb_printf(" IA32_MCG_RSP: 0x%016qx\n", rdmsr64(IA32_MCG_RSP));
218 kdb_printf(" IA32_MCG_RFLAGS: 0x%016qx\n", rdmsr64(IA32_MCG_RFLAGS));
219 kdb_printf(" IA32_MCG_RIP: 0x%016qx\n", rdmsr64(IA32_MCG_RIP));
220 kdb_printf(" IA32_MCG_MISC: 0x%016qx\n", rdmsr64(IA32_MCG_MISC));
221 kdb_printf(" IA32_MCG_R8: 0x%016qx\n", rdmsr64(IA32_MCG_R8));
222 kdb_printf(" IA32_MCG_R9: 0x%016qx\n", rdmsr64(IA32_MCG_R9));
223 kdb_printf(" IA32_MCG_R10: 0x%016qx\n", rdmsr64(IA32_MCG_R10));
224 kdb_printf(" IA32_MCG_R11: 0x%016qx\n", rdmsr64(IA32_MCG_R11));
225 kdb_printf(" IA32_MCG_R12: 0x%016qx\n", rdmsr64(IA32_MCG_R12));
226 kdb_printf(" IA32_MCG_R13: 0x%016qx\n", rdmsr64(IA32_MCG_R13));
227 kdb_printf(" IA32_MCG_R14: 0x%016qx\n", rdmsr64(IA32_MCG_R14));
228 kdb_printf(" IA32_MCG_R15: 0x%016qx\n", rdmsr64(IA32_MCG_R15));
229 }
230
231 static uint32_t rdmsr32(uint32_t msr)
232 {
233 return (uint32_t) rdmsr64(msr);
234 }
235
236 static void mca_dump_32bit_state(void)
237 {
238 kdb_printf("Extended Machine Check State:\n");
239 kdb_printf(" IA32_MCG_EAX: 0x%08x\n", rdmsr32(IA32_MCG_EAX));
240 kdb_printf(" IA32_MCG_EBX: 0x%08x\n", rdmsr32(IA32_MCG_EBX));
241 kdb_printf(" IA32_MCG_ECX: 0x%08x\n", rdmsr32(IA32_MCG_ECX));
242 kdb_printf(" IA32_MCG_EDX: 0x%08x\n", rdmsr32(IA32_MCG_EDX));
243 kdb_printf(" IA32_MCG_ESI: 0x%08x\n", rdmsr32(IA32_MCG_ESI));
244 kdb_printf(" IA32_MCG_EDI: 0x%08x\n", rdmsr32(IA32_MCG_EDI));
245 kdb_printf(" IA32_MCG_EBP: 0x%08x\n", rdmsr32(IA32_MCG_EBP));
246 kdb_printf(" IA32_MCG_ESP: 0x%08x\n", rdmsr32(IA32_MCG_ESP));
247 kdb_printf(" IA32_MCG_EFLAGS: 0x%08x\n", rdmsr32(IA32_MCG_EFLAGS));
248 kdb_printf(" IA32_MCG_EIP: 0x%08x\n", rdmsr32(IA32_MCG_EIP));
249 kdb_printf(" IA32_MCG_MISC: 0x%08x\n", rdmsr32(IA32_MCG_MISC));
250 }
251
252 static void
253 mca_report_cpu_info(void)
254 {
255 uint64_t microcode;
256 i386_cpu_info_t *infop = cpuid_info();
257
258 // microcode revision is top 32 bits of MSR_IA32_UCODE_REV
259 microcode = rdmsr64(MSR_IA32_UCODE_REV) >> 32;
260 kdb_printf(" family: %d model: %d stepping: %d microcode: %d\n",
261 infop->cpuid_family,
262 infop->cpuid_model,
263 infop->cpuid_stepping,
264 (uint32_t) microcode);
265 kdb_printf(" %s\n", infop->cpuid_brand_string);
266 }
267
268 static const char *mca_threshold_status[] = {
269 [THRESHOLD_STATUS_NO_TRACKING] "No tracking",
270 [THRESHOLD_STATUS_GREEN] "Green",
271 [THRESHOLD_STATUS_YELLOW] "Yellow",
272 [THRESHOLD_STATUS_RESERVED] "Reserved"
273 };
274
275 static void
276 mca_dump_bank(mca_state_t *state, int i)
277 {
278 mca_mci_bank_t *bank;
279 ia32_mci_status_t status;
280
281 bank = &state->mca_error_bank[i];
282 status = bank->mca_mci_status;
283 kdb_printf(
284 " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n",
285 i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in"));
286 if (!status.bits.val)
287 return;
288
289 kdb_printf(
290 " MCA error code: 0x%04x\n",
291 status.bits.mca_error);
292 kdb_printf(
293 " Model specific error code: 0x%04x\n",
294 status.bits.model_specific_error);
295 if (!mca_threshold_status_present) {
296 kdb_printf(
297 " Other information: 0x%08x\n",
298 status.bits.other_information);
299 } else {
300 int threshold = status.bits_tes_p.threshold;
301 kdb_printf(
302 " Other information: 0x%08x\n"
303 " Threshold-based status: %s\n",
304 status.bits_tes_p.other_information,
305 (status.bits_tes_p.uc == 0) ?
306 mca_threshold_status[threshold] :
307 "Undefined");
308 }
309 kdb_printf(
310 " Status bits:\n%s%s%s%s%s%s",
311 IF(status.bits.pcc, " Processor context corrupt\n"),
312 IF(status.bits.addrv, " ADDR register valid\n"),
313 IF(status.bits.miscv, " MISC register valid\n"),
314 IF(status.bits.en, " Error enabled\n"),
315 IF(status.bits.uc, " Uncorrected error\n"),
316 IF(status.bits.over, " Error overflow\n"));
317 if (status.bits.addrv)
318 kdb_printf(
319 " IA32_MC%d_ADDR(0x%x): 0x%016qx\n",
320 i, IA32_MCi_ADDR(i), bank->mca_mci_addr);
321 if (status.bits.miscv)
322 kdb_printf(
323 " IA32_MC%d_MISC(0x%x): 0x%016qx\n",
324 i, IA32_MCi_MISC(i), bank->mca_mci_misc);
325 }
326
327 static void
328 mca_dump_error_banks(mca_state_t *state)
329 {
330 unsigned int i;
331
332 kdb_printf("MCA error-reporting registers:\n");
333 for (i = 0; i < mca_error_bank_count; i++ ) {
334 mca_dump_bank(state, i);
335 }
336 }
337
338 void
339 mca_dump(void)
340 {
341 ia32_mcg_status_t status;
342 mca_state_t *mca_state = current_cpu_datap()->cpu_mca_state;
343
344 /*
345 * Capture local MCA registers to per-cpu data.
346 */
347 mca_save_state(mca_state);
348
349 /*
350 * Serialize in case of multiple simultaneous machine-checks.
351 * Only the first caller is allowed to dump MCA registers,
352 * other threads spin meantime.
353 */
354 simple_lock(&mca_lock);
355 if (mca_dump_state > CLEAR) {
356 simple_unlock(&mca_lock);
357 while (mca_dump_state == DUMPING)
358 cpu_pause();
359 return;
360 }
361 mca_dump_state = DUMPING;
362 simple_unlock(&mca_lock);
363
364 /*
365 * Report machine-check capabilities:
366 */
367 kdb_printf(
368 "Machine-check capabilities (cpu %d) 0x%016qx:\n",
369 cpu_number(), ia32_mcg_cap.u64);
370
371 mca_report_cpu_info();
372
373 kdb_printf(
374 " %d error-reporting banks\n%s%s%s", mca_error_bank_count,
375 IF(mca_control_MSR_present,
376 " control MSR present\n"),
377 IF(mca_threshold_status_present,
378 " threshold-based error status present\n"),
379 "");
380 if (mca_extended_MSRs_present)
381 kdb_printf(
382 " %d extended MSRs present\n", mca_extended_MSRs_count);
383
384 /*
385 * Report machine-check status:
386 */
387 status.u64 = rdmsr64(IA32_MCG_STATUS);
388 kdb_printf(
389 "Machine-check status 0x%016qx:\n%s%s%s", status.u64,
390 IF(status.bits.ripv, " restart IP valid\n"),
391 IF(status.bits.eipv, " error IP valid\n"),
392 IF(status.bits.mcip, " machine-check in progress\n"));
393
394 /*
395 * Dump error-reporting registers:
396 */
397 mca_dump_error_banks(mca_state);
398
399 /*
400 * Dump any extended machine state:
401 */
402 if (mca_extended_MSRs_present) {
403 if (cpu_mode_is64bit())
404 mca_dump_64bit_state();
405 else
406 mca_dump_32bit_state();
407 }
408
409 /* Update state to release any other threads. */
410 mca_dump_state = DUMPED;
411 }