|
|
|
@ -22,6 +22,10 @@ struct ghes_edac_pvt { |
|
|
|
|
struct list_head list; |
|
|
|
|
struct ghes *ghes; |
|
|
|
|
struct mem_ctl_info *mci; |
|
|
|
|
|
|
|
|
|
/* Buffers for the error handling routine */ |
|
|
|
|
char other_detail[160]; |
|
|
|
|
char msg[80]; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static LIST_HEAD(ghes_reglist); |
|
|
|
@ -186,6 +190,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, |
|
|
|
|
struct edac_raw_error_desc *e; |
|
|
|
|
struct mem_ctl_info *mci; |
|
|
|
|
struct ghes_edac_pvt *pvt = NULL; |
|
|
|
|
char *p; |
|
|
|
|
|
|
|
|
|
list_for_each_entry(pvt, &ghes_reglist, list) { |
|
|
|
|
if (ghes == pvt->ghes) |
|
|
|
@ -201,15 +206,14 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, |
|
|
|
|
/* Cleans the error report buffer */ |
|
|
|
|
memset(e, 0, sizeof (*e)); |
|
|
|
|
e->error_count = 1; |
|
|
|
|
e->msg = "APEI"; |
|
|
|
|
strcpy(e->label, "unknown"); |
|
|
|
|
e->other_detail = ""; |
|
|
|
|
|
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { |
|
|
|
|
e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; |
|
|
|
|
e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; |
|
|
|
|
e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); |
|
|
|
|
} |
|
|
|
|
strcpy(e->label, "unknown label"); |
|
|
|
|
e->msg = pvt->msg; |
|
|
|
|
e->other_detail = pvt->other_detail; |
|
|
|
|
e->top_layer = -1; |
|
|
|
|
e->mid_layer = -1; |
|
|
|
|
e->low_layer = -1; |
|
|
|
|
*pvt->other_detail = '\0'; |
|
|
|
|
*pvt->msg = '\0'; |
|
|
|
|
|
|
|
|
|
switch (sev) { |
|
|
|
|
case GHES_SEV_CORRECTED: |
|
|
|
@ -226,12 +230,173 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, |
|
|
|
|
type = HW_EVENT_ERR_INFO; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
sprintf(e->location, |
|
|
|
|
"node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d", |
|
|
|
|
mem_err->node, mem_err->card, mem_err->module, |
|
|
|
|
mem_err->bank, mem_err->device, mem_err->row, mem_err->column, |
|
|
|
|
mem_err->bit_pos); |
|
|
|
|
edac_dbg(3, "error at location %s\n", e->location); |
|
|
|
|
edac_dbg(1, "error validation_bits: 0x%08llx\n", |
|
|
|
|
(long long)mem_err->validation_bits); |
|
|
|
|
|
|
|
|
|
/* Error type, mapped on e->msg */ |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { |
|
|
|
|
p = pvt->msg; |
|
|
|
|
switch (mem_err->error_type) { |
|
|
|
|
case 0: |
|
|
|
|
p += sprintf(p, "Unknown"); |
|
|
|
|
break; |
|
|
|
|
case 1: |
|
|
|
|
p += sprintf(p, "No error"); |
|
|
|
|
break; |
|
|
|
|
case 2: |
|
|
|
|
p += sprintf(p, "Single-bit ECC"); |
|
|
|
|
break; |
|
|
|
|
case 3: |
|
|
|
|
p += sprintf(p, "Multi-bit ECC"); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
p += sprintf(p, "Single-symbol ChipKill ECC"); |
|
|
|
|
break; |
|
|
|
|
case 5: |
|
|
|
|
p += sprintf(p, "Multi-symbol ChipKill ECC"); |
|
|
|
|
break; |
|
|
|
|
case 6: |
|
|
|
|
p += sprintf(p, "Master abort"); |
|
|
|
|
break; |
|
|
|
|
case 7: |
|
|
|
|
p += sprintf(p, "Target abort"); |
|
|
|
|
break; |
|
|
|
|
case 8: |
|
|
|
|
p += sprintf(p, "Parity Error"); |
|
|
|
|
break; |
|
|
|
|
case 9: |
|
|
|
|
p += sprintf(p, "Watchdog timeout"); |
|
|
|
|
break; |
|
|
|
|
case 10: |
|
|
|
|
p += sprintf(p, "Invalid address"); |
|
|
|
|
break; |
|
|
|
|
case 11: |
|
|
|
|
p += sprintf(p, "Mirror Broken"); |
|
|
|
|
break; |
|
|
|
|
case 12: |
|
|
|
|
p += sprintf(p, "Memory Sparing"); |
|
|
|
|
break; |
|
|
|
|
case 13: |
|
|
|
|
p += sprintf(p, "Scrub corrected error"); |
|
|
|
|
break; |
|
|
|
|
case 14: |
|
|
|
|
p += sprintf(p, "Scrub uncorrected error"); |
|
|
|
|
break; |
|
|
|
|
case 15: |
|
|
|
|
p += sprintf(p, "Physical Memory Map-out event"); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
p += sprintf(p, "reserved error (%d)", |
|
|
|
|
mem_err->error_type); |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
strcpy(pvt->msg, "unknown error"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Error address */ |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { |
|
|
|
|
e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; |
|
|
|
|
e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Error grain */ |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { |
|
|
|
|
e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Memory error location, mapped on e->location */ |
|
|
|
|
p = e->location; |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_NODE) |
|
|
|
|
p += sprintf(p, "node:%d ", mem_err->node); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_CARD) |
|
|
|
|
p += sprintf(p, "card:%d ", mem_err->card); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) |
|
|
|
|
p += sprintf(p, "module:%d ", mem_err->module); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_BANK) |
|
|
|
|
p += sprintf(p, "bank:%d ", mem_err->bank); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_ROW) |
|
|
|
|
p += sprintf(p, "row:%d ", mem_err->row); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) |
|
|
|
|
p += sprintf(p, "col:%d ", mem_err->column); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) |
|
|
|
|
p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); |
|
|
|
|
if (p > e->location) |
|
|
|
|
*(p - 1) = '\0'; |
|
|
|
|
|
|
|
|
|
/* All other fields are mapped on e->other_detail */ |
|
|
|
|
p = pvt->other_detail; |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { |
|
|
|
|
u64 status = mem_err->error_status; |
|
|
|
|
|
|
|
|
|
p += sprintf(p, "status(0x%016llx): ", (long long)status); |
|
|
|
|
switch ((status >> 8) & 0xff) { |
|
|
|
|
case 1: |
|
|
|
|
p += sprintf(p, "Error detected internal to the component "); |
|
|
|
|
break; |
|
|
|
|
case 16: |
|
|
|
|
p += sprintf(p, "Error detected in the bus "); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
p += sprintf(p, "Storage error in DRAM memory "); |
|
|
|
|
break; |
|
|
|
|
case 5: |
|
|
|
|
p += sprintf(p, "Storage error in TLB "); |
|
|
|
|
break; |
|
|
|
|
case 6: |
|
|
|
|
p += sprintf(p, "Storage error in cache "); |
|
|
|
|
break; |
|
|
|
|
case 7: |
|
|
|
|
p += sprintf(p, "Error in one or more functional units "); |
|
|
|
|
break; |
|
|
|
|
case 8: |
|
|
|
|
p += sprintf(p, "component failed self test "); |
|
|
|
|
break; |
|
|
|
|
case 9: |
|
|
|
|
p += sprintf(p, "Overflow or undervalue of internal queue "); |
|
|
|
|
break; |
|
|
|
|
case 17: |
|
|
|
|
p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); |
|
|
|
|
break; |
|
|
|
|
case 18: |
|
|
|
|
p += sprintf(p, "Improper access error "); |
|
|
|
|
break; |
|
|
|
|
case 19: |
|
|
|
|
p += sprintf(p, "Access to a memory address which is not mapped to any component "); |
|
|
|
|
break; |
|
|
|
|
case 20: |
|
|
|
|
p += sprintf(p, "Loss of Lockstep "); |
|
|
|
|
break; |
|
|
|
|
case 21: |
|
|
|
|
p += sprintf(p, "Response not associated with a request "); |
|
|
|
|
break; |
|
|
|
|
case 22: |
|
|
|
|
p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); |
|
|
|
|
break; |
|
|
|
|
case 23: |
|
|
|
|
p += sprintf(p, "Detection of a PATH_ERROR "); |
|
|
|
|
break; |
|
|
|
|
case 25: |
|
|
|
|
p += sprintf(p, "Bus operation timeout "); |
|
|
|
|
break; |
|
|
|
|
case 26: |
|
|
|
|
p += sprintf(p, "A read was issued to data that has been poisoned "); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
p += sprintf(p, "reserved "); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) |
|
|
|
|
p += sprintf(p, "requestorID: 0x%016llx ", |
|
|
|
|
(long long)mem_err->requestor_id); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) |
|
|
|
|
p += sprintf(p, "responderID: 0x%016llx ", |
|
|
|
|
(long long)mem_err->responder_id); |
|
|
|
|
if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) |
|
|
|
|
p += sprintf(p, "targetID: 0x%016llx ", |
|
|
|
|
(long long)mem_err->responder_id); |
|
|
|
|
if (p > pvt->other_detail) |
|
|
|
|
*(p - 1) = '\0'; |
|
|
|
|
|
|
|
|
|
edac_raw_mc_handle_error(type, mci, e); |
|
|
|
|
} |
|
|
|
|