X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6d2010ae8f7a6078e10b361c6962983bab233e0f..d9a64523371fa019c4575bb400cbbc3a50ac9903:/osfmk/i386/machine_check.c diff --git a/osfmk/i386/machine_check.c b/osfmk/i386/machine_check.c index 77681d340..ce8344659 100644 --- a/osfmk/i386/machine_check.c +++ b/osfmk/i386/machine_check.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,10 +32,22 @@ #include #include #include +#include #include #include #include +/* + * At the time of the machine-check exception, all hardware-threads panic. + * Each thread saves the state of its MCA registers to its per-cpu data area. + * + * State reporting is serialized so one thread dumps all valid state for all + * threads to the panic log. This may entail spinning waiting for other + * threads to complete saving state to memory. A timeout applies to this wait + * -- in particular, a 3-strikes timeout may prevent a thread from taking + * part is the affair. + */ + #define IF(bool,str) ((bool) ? (str) : "") static boolean_t mca_initialized = FALSE; @@ -44,10 +56,6 @@ static boolean_t mca_MCA_present = FALSE; static uint32_t mca_family = 0; static unsigned int mca_error_bank_count = 0; static boolean_t mca_control_MSR_present = FALSE; -static boolean_t mca_threshold_status_present = FALSE; -static boolean_t mca_sw_error_recovery_present = FALSE; -static boolean_t mca_extended_MSRs_present = FALSE; -static unsigned int mca_extended_MSRs_count = 0; static boolean_t mca_cmci_present = FALSE; static ia32_mcg_cap_t ia32_mcg_cap; decl_simple_lock_data(static, mca_lock); @@ -60,6 +68,8 @@ typedef struct { } mca_mci_bank_t; typedef struct mca_state { + boolean_t mca_is_saved; + boolean_t mca_is_valid; /* some state is valid */ ia32_mcg_ctl_t mca_mcg_ctl; ia32_mcg_status_t mca_mcg_status; mca_mci_bank_t mca_error_bank[0]; @@ -76,12 +86,19 @@ static void mca_get_availability(void) { uint64_t features = cpuid_info()->cpuid_features; - uint32_t family = cpuid_info()->cpuid_family; + uint32_t family = cpuid_info()->cpuid_family; + uint32_t model = cpuid_info()->cpuid_model; + uint32_t stepping = cpuid_info()->cpuid_stepping; + + if ((model == CPUID_MODEL_HASWELL && stepping < 3) || + (model == CPUID_MODEL_HASWELL_ULT && stepping < 1) || + (model == CPUID_MODEL_CRYSTALWELL && stepping < 1)) + panic("Haswell pre-C0 steppings are not supported"); mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0; mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0; mca_family = family; - + /* * If MCA, the number of banks etc is reported by the IA32_MCG_CAP MSR. */ @@ -89,13 +106,7 @@ mca_get_availability(void) ia32_mcg_cap.u64 = rdmsr64(IA32_MCG_CAP); mca_error_bank_count = ia32_mcg_cap.bits.count; mca_control_MSR_present = ia32_mcg_cap.bits.mcg_ctl_p; - mca_threshold_status_present = ia32_mcg_cap.bits.mcg_tes_p; - mca_sw_error_recovery_present = ia32_mcg_cap.bits.mcg_ser_p; mca_cmci_present = ia32_mcg_cap.bits.mcg_ext_corr_err_p; - if (family == 0x0F) { - mca_extended_MSRs_present = ia32_mcg_cap.bits.mcg_ext_p; - mca_extended_MSRs_count = ia32_mcg_cap.bits.mcg_ext_cnt; - } } } @@ -206,6 +217,7 @@ mca_save_state(mca_state_t *mca_state) rdmsr64(IA32_MCi_MISC(i)) : 0ULL; bank->mca_mci_addr = (bank->mca_mci_status.bits.addrv)? rdmsr64(IA32_MCi_ADDR(i)) : 0ULL; + mca_state->mca_is_valid = TRUE; } /* @@ -214,6 +226,8 @@ mca_save_state(mca_state_t *mca_state) */ if (x86_package()->mca_state == NULL) x86_package()->mca_state = mca_state; + + mca_state->mca_is_saved = TRUE; } void @@ -223,146 +237,23 @@ mca_check_save(void) mca_save_state(current_cpu_datap()->cpu_mca_state); } -static void mca_dump_64bit_state(void) -{ - kdb_printf("Extended Machine Check State:\n"); - kdb_printf(" IA32_MCG_RAX: 0x%016qx\n", rdmsr64(IA32_MCG_RAX)); - kdb_printf(" IA32_MCG_RBX: 0x%016qx\n", rdmsr64(IA32_MCG_RBX)); - kdb_printf(" IA32_MCG_RCX: 0x%016qx\n", rdmsr64(IA32_MCG_RCX)); - kdb_printf(" IA32_MCG_RDX: 0x%016qx\n", rdmsr64(IA32_MCG_RDX)); - kdb_printf(" IA32_MCG_RSI: 0x%016qx\n", rdmsr64(IA32_MCG_RSI)); - kdb_printf(" IA32_MCG_RDI: 0x%016qx\n", rdmsr64(IA32_MCG_RDI)); - kdb_printf(" IA32_MCG_RBP: 0x%016qx\n", rdmsr64(IA32_MCG_RBP)); - kdb_printf(" IA32_MCG_RSP: 0x%016qx\n", rdmsr64(IA32_MCG_RSP)); - kdb_printf(" IA32_MCG_RFLAGS: 0x%016qx\n", rdmsr64(IA32_MCG_RFLAGS)); - kdb_printf(" IA32_MCG_RIP: 0x%016qx\n", rdmsr64(IA32_MCG_RIP)); - kdb_printf(" IA32_MCG_MISC: 0x%016qx\n", rdmsr64(IA32_MCG_MISC)); - kdb_printf(" IA32_MCG_R8: 0x%016qx\n", rdmsr64(IA32_MCG_R8)); - kdb_printf(" IA32_MCG_R9: 0x%016qx\n", rdmsr64(IA32_MCG_R9)); - kdb_printf(" IA32_MCG_R10: 0x%016qx\n", rdmsr64(IA32_MCG_R10)); - kdb_printf(" IA32_MCG_R11: 0x%016qx\n", rdmsr64(IA32_MCG_R11)); - kdb_printf(" IA32_MCG_R12: 0x%016qx\n", rdmsr64(IA32_MCG_R12)); - kdb_printf(" IA32_MCG_R13: 0x%016qx\n", rdmsr64(IA32_MCG_R13)); - kdb_printf(" IA32_MCG_R14: 0x%016qx\n", rdmsr64(IA32_MCG_R14)); - kdb_printf(" IA32_MCG_R15: 0x%016qx\n", rdmsr64(IA32_MCG_R15)); -} - -static uint32_t rdmsr32(uint32_t msr) -{ - return (uint32_t) rdmsr64(msr); -} - -static void mca_dump_32bit_state(void) -{ - kdb_printf("Extended Machine Check State:\n"); - kdb_printf(" IA32_MCG_EAX: 0x%08x\n", rdmsr32(IA32_MCG_EAX)); - kdb_printf(" IA32_MCG_EBX: 0x%08x\n", rdmsr32(IA32_MCG_EBX)); - kdb_printf(" IA32_MCG_ECX: 0x%08x\n", rdmsr32(IA32_MCG_ECX)); - kdb_printf(" IA32_MCG_EDX: 0x%08x\n", rdmsr32(IA32_MCG_EDX)); - kdb_printf(" IA32_MCG_ESI: 0x%08x\n", rdmsr32(IA32_MCG_ESI)); - kdb_printf(" IA32_MCG_EDI: 0x%08x\n", rdmsr32(IA32_MCG_EDI)); - kdb_printf(" IA32_MCG_EBP: 0x%08x\n", rdmsr32(IA32_MCG_EBP)); - kdb_printf(" IA32_MCG_ESP: 0x%08x\n", rdmsr32(IA32_MCG_ESP)); - kdb_printf(" IA32_MCG_EFLAGS: 0x%08x\n", rdmsr32(IA32_MCG_EFLAGS)); - kdb_printf(" IA32_MCG_EIP: 0x%08x\n", rdmsr32(IA32_MCG_EIP)); - kdb_printf(" IA32_MCG_MISC: 0x%08x\n", rdmsr32(IA32_MCG_MISC)); -} - static void mca_report_cpu_info(void) { i386_cpu_info_t *infop = cpuid_info(); - kdb_printf(" family: %d model: %d stepping: %d microcode: %d\n", + paniclog_append_noflush(" family: %d model: %d stepping: %d microcode: %d\n", infop->cpuid_family, infop->cpuid_model, infop->cpuid_stepping, infop->cpuid_microcode_version); - kdb_printf(" %s\n", infop->cpuid_brand_string); -} - -static const char *mc8_memory_operation[] = { - [MC8_MMM_GENERIC] = "generic", - [MC8_MMM_READ] = "read", - [MC8_MMM_WRITE] = "write", - [MC8_MMM_ADDRESS_COMMAND] = "address/command", - [MC8_MMM_RESERVED] = "reserved" -}; - -static void -mca_dump_bank_mc8(mca_state_t *state, int i) -{ - mca_mci_bank_t *bank; - ia32_mci_status_t status; - struct ia32_mc8_specific mc8; - int mmm; + paniclog_append_noflush(" signature: 0x%x\n", + infop->cpuid_signature); + paniclog_append_noflush(" %s\n", + infop->cpuid_brand_string); - bank = &state->mca_error_bank[i]; - status = bank->mca_mci_status; - mc8 = status.bits_mc8; - mmm = MIN(mc8.memory_operation, MC8_MMM_RESERVED); - - kdb_printf( - " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n", - i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in")); - if (!status.bits.val) - return; - - kdb_printf( - " Channel number: %d%s\n" - " Memory Operation: %s\n" - " Machine-specific error: %s%s%s%s%s%s%s%s%s\n" - " COR_ERR_CNT: %d\n", - mc8.channel_number, - IF(mc8.channel_number == 15, " (unknown)"), - mc8_memory_operation[mmm], - IF(mc8.read_ecc, "Read ECC "), - IF(mc8.ecc_on_a_scrub, "ECC on scrub "), - IF(mc8.write_parity, "Write parity "), - IF(mc8.redundant_memory, "Redundant memory "), - IF(mc8.sparing, "Sparing/Resilvering "), - IF(mc8.access_out_of_range, "Access out of Range "), - IF(mc8.rtid_out_of_range, "RTID out of Range "), - IF(mc8.address_parity, "Address Parity "), - IF(mc8.byte_enable_parity, "Byte Enable Parity "), - mc8.cor_err_cnt); - kdb_printf( - " Status bits:\n%s%s%s%s%s%s", - IF(status.bits.pcc, " Processor context corrupt\n"), - IF(status.bits.addrv, " ADDR register valid\n"), - IF(status.bits.miscv, " MISC register valid\n"), - IF(status.bits.en, " Error enabled\n"), - IF(status.bits.uc, " Uncorrected error\n"), - IF(status.bits.over, " Error overflow\n")); - if (status.bits.addrv) - kdb_printf( - " IA32_MC%d_ADDR(0x%x): 0x%016qx\n", - i, IA32_MCi_ADDR(i), bank->mca_mci_addr); - if (status.bits.miscv) { - ia32_mc8_misc_t mc8_misc; - - mc8_misc.u64 = bank->mca_mci_misc; - kdb_printf( - " IA32_MC%d_MISC(0x%x): 0x%016qx\n" - " RTID: %d\n" - " DIMM: %d\n" - " Channel: %d\n" - " Syndrome: 0x%x\n", - i, IA32_MCi_MISC(i), mc8_misc.u64, - mc8_misc.bits.rtid, - mc8_misc.bits.dimm, - mc8_misc.bits.channel, - (int) mc8_misc.bits.syndrome); - } } -static const char *mca_threshold_status[] = { - [THRESHOLD_STATUS_NO_TRACKING] = "No tracking", - [THRESHOLD_STATUS_GREEN] = "Green", - [THRESHOLD_STATUS_YELLOW] = "Yellow", - [THRESHOLD_STATUS_RESERVED] = "Reserved" -}; - static void mca_dump_bank(mca_state_t *state, int i) { @@ -371,95 +262,30 @@ mca_dump_bank(mca_state_t *state, int i) bank = &state->mca_error_bank[i]; status = bank->mca_mci_status; - kdb_printf( - " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n", - i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in")); if (!status.bits.val) return; - kdb_printf( - " MCA error code: 0x%04x\n", - status.bits.mca_error); - kdb_printf( - " Model specific error code: 0x%04x\n", - status.bits.model_specific_error); - if (!mca_threshold_status_present) { - kdb_printf( - " Other information: 0x%08x\n", - status.bits.other_information); - } else { - int threshold = status.bits_tes_p.threshold; - kdb_printf( - " Other information: 0x%08x\n" - " Threshold-based status: %s\n", - status.bits_tes_p.other_information, - (status.bits_tes_p.uc == 0) ? - mca_threshold_status[threshold] : - "Undefined"); - } - if (mca_threshold_status_present && - mca_sw_error_recovery_present) { - kdb_printf( - " Software Error Recovery:\n%s%s", - IF(status.bits_tes_p.ar, " Recovery action reqd\n"), - IF(status.bits_tes_p.s, " Signaling UCR error\n")); - } - kdb_printf( - " Status bits:\n%s%s%s%s%s%s", - IF(status.bits.pcc, " Processor context corrupt\n"), - IF(status.bits.addrv, " ADDR register valid\n"), - IF(status.bits.miscv, " MISC register valid\n"), - IF(status.bits.en, " Error enabled\n"), - IF(status.bits.uc, " Uncorrected error\n"), - IF(status.bits.over, " Error overflow\n")); + paniclog_append_noflush(" IA32_MC%d_STATUS(0x%x): 0x%016qx\n", + i, IA32_MCi_STATUS(i), status.u64); + if (status.bits.addrv) - kdb_printf( - " IA32_MC%d_ADDR(0x%x): 0x%016qx\n", + paniclog_append_noflush(" IA32_MC%d_ADDR(0x%x): 0x%016qx\n", i, IA32_MCi_ADDR(i), bank->mca_mci_addr); + if (status.bits.miscv) - kdb_printf( - " IA32_MC%d_MISC(0x%x): 0x%016qx\n", + paniclog_append_noflush(" IA32_MC%d_MISC(0x%x): 0x%016qx\n", i, IA32_MCi_MISC(i), bank->mca_mci_misc); } static void -mca_dump_error_banks(mca_state_t *state) +mca_cpu_dump_error_banks(mca_state_t *state) { unsigned int i; - kdb_printf("MCA error-reporting registers:\n"); + if (!state->mca_is_valid) + return; + for (i = 0; i < mca_error_bank_count; i++ ) { - if (i == 8) { - /* - * Fatal Memory Error - */ - - /* Dump MC8 for local package */ - kdb_printf(" Package %d logged:\n", - x86_package()->ppkg_num); - mca_dump_bank_mc8(state, 8); - - /* If there's other packages, report their MC8s */ - x86_pkg_t *pkg; - uint64_t deadline; - for (pkg = x86_pkgs; pkg != NULL; pkg = pkg->next) { - if (pkg == x86_package()) - continue; - deadline = mach_absolute_time() + LockTimeOut; - while (pkg->mca_state == NULL && - mach_absolute_time() < deadline) - cpu_pause(); - if (pkg->mca_state) { - kdb_printf(" Package %d logged:\n", - pkg->ppkg_num); - mca_dump_bank_mc8(pkg->mca_state, 8); - } else { - kdb_printf(" Package %d timed out!\n", - pkg->ppkg_num); - } - } - continue; - } mca_dump_bank(state, i); } } @@ -467,8 +293,9 @@ mca_dump_error_banks(mca_state_t *state) void mca_dump(void) { - ia32_mcg_status_t status; - mca_state_t *mca_state = current_cpu_datap()->cpu_mca_state; + mca_state_t *mca_state = current_cpu_datap()->cpu_mca_state; + uint64_t deadline; + unsigned int i = 0; /* * Capture local MCA registers to per-cpu data. @@ -476,8 +303,7 @@ mca_dump(void) mca_save_state(mca_state); /* - * Serialize in case of multiple simultaneous machine-checks. - * Only the first caller is allowed to dump MCA registers, + * Serialize: the first caller controls dumping MCA registers, * other threads spin meantime. */ simple_lock(&mca_lock); @@ -491,51 +317,56 @@ mca_dump(void) simple_unlock(&mca_lock); /* - * Report machine-check capabilities: + * Wait for all other hardware threads to save their state. + * Or timeout. */ - kdb_printf( - "Machine-check capabilities (cpu %d) 0x%016qx:\n", - cpu_number(), ia32_mcg_cap.u64); - - mca_report_cpu_info(); + deadline = mach_absolute_time() + LockTimeOut; + while (mach_absolute_time() < deadline && i < real_ncpus) { + if (!cpu_datap(i)->cpu_mca_state->mca_is_saved) { + cpu_pause(); + continue; + } + i += 1; + } - kdb_printf( - " %d error-reporting banks\n%s%s%s", mca_error_bank_count, - IF(mca_control_MSR_present, - " control MSR present\n"), - IF(mca_threshold_status_present, - " threshold-based error status present\n"), - IF(mca_cmci_present, - " extended corrected memory error handling present\n")); - if (mca_extended_MSRs_present) - kdb_printf( - " %d extended MSRs present\n", mca_extended_MSRs_count); - /* - * Report machine-check status: + * Report machine-check capabilities: */ - status.u64 = rdmsr64(IA32_MCG_STATUS); - kdb_printf( - "Machine-check status 0x%016qx:\n%s%s%s", status.u64, - IF(status.bits.ripv, " restart IP valid\n"), - IF(status.bits.eipv, " error IP valid\n"), - IF(status.bits.mcip, " machine-check in progress\n")); + paniclog_append_noflush("Machine-check capabilities: 0x%016qx\n", ia32_mcg_cap.u64); - /* - * Dump error-reporting registers: - */ - mca_dump_error_banks(mca_state); + mca_report_cpu_info(); + paniclog_append_noflush(" %d error-reporting banks\n", mca_error_bank_count); + /* - * Dump any extended machine state: + * Dump all processor state: */ - if (mca_extended_MSRs_present) { - if (cpu_mode_is64bit()) - mca_dump_64bit_state(); - else - mca_dump_32bit_state(); + for (i = 0; i < real_ncpus; i++) { + mca_state_t *mcsp = cpu_datap(i)->cpu_mca_state; + ia32_mcg_status_t status; + + if (mcsp == NULL || + mcsp->mca_is_saved == FALSE || + mcsp->mca_mcg_status.u64 == 0 || + !mcsp->mca_is_valid) { + continue; + } + status = mcsp->mca_mcg_status; + paniclog_append_noflush("Processor %d: IA32_MCG_STATUS: 0x%016qx\n", + i, status.u64); + mca_cpu_dump_error_banks(mcsp); } /* Update state to release any other threads. */ mca_dump_state = DUMPED; } + + +#if DEVELOPMENT || DEBUG +extern void mca_exception_panic(void); +extern void lapic_trigger_MC(void); +void mca_exception_panic(void) +{ + lapic_trigger_MC(); +} +#endif