aboutsummaryrefslogtreecommitdiffstats
path: root/meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch')
-rw-r--r--meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch143
1 files changed, 143 insertions, 0 deletions
diff --git a/meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch b/meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch
new file mode 100644
index 00000000..396ea4a5
--- /dev/null
+++ b/meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch
@@ -0,0 +1,143 @@
+From 6fbcc132ec02586165b39b6a324becedc1da052f Mon Sep 17 00:00:00 2001
+From: Yazen Ghannam <yazen.ghannam@amd.com>
+Date: Tue, 13 Jun 2017 18:28:29 +0200
+Subject: [PATCH 08/10] x86/mce/AMD: Use saved threshold block info in
+ interrupt handler
+
+In the amd_threshold_interrupt() handler, we loop through every possible
+block in each bank and rediscover the block's address and if it's valid,
+e.g. valid, counter present and not locked.
+
+However, we already have the address saved in the threshold blocks list
+for each CPU and bank. The list only contains blocks that have passed
+all the valid checks.
+
+Besides the redundancy, there's also a smp_call_function* in
+get_block_address() which causes a warning when servicing the interrupt:
+
+ WARNING: CPU: 0 PID: 0 at kernel/smp.c:281 smp_call_function_single+0xdd/0xf0
+ ...
+ Call Trace:
+ <IRQ>
+ rdmsr_safe_on_cpu()
+ get_block_address.isra.2()
+ amd_threshold_interrupt()
+ smp_threshold_interrupt()
+ threshold_interrupt()
+
+because we do get called in an interrupt handler *with* interrupts
+disabled, which can result in a deadlock.
+
+Drop the redundant valid checks and move the overflow check, logging and
+block reset into a separate function.
+
+Check the first block then iterate over the rest. This procedure is
+needed since the first block is used as the head of the list.
+
+Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: linux-edac <linux-edac@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20170613162835.30750-3-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
+---
+ arch/x86/kernel/cpu/mcheck/mce_amd.c | 66 +++++++++++++++++++-----------------
+ 1 file changed, 35 insertions(+), 31 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+index a4e38c4..188f95b 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -868,49 +868,53 @@ static void log_error_thresholding(unsigned int bank, u64 misc)
+ _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
+ }
+
++static void log_and_reset_block(struct threshold_block *block)
++{
++ struct thresh_restart tr;
++ u32 low = 0, high = 0;
++
++ if (!block)
++ return;
++
++ if (rdmsr_safe(block->address, &low, &high))
++ return;
++
++ if (!(high & MASK_OVERFLOW_HI))
++ return;
++
++ /* Log the MCE which caused the threshold event. */
++ log_error_thresholding(block->bank, ((u64)high << 32) | low);
++
++ /* Reset threshold block after logging error. */
++ memset(&tr, 0, sizeof(tr));
++ tr.b = block;
++ threshold_restart_bank(&tr);
++}
++
+ /*
+ * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
+ * goes off when error_count reaches threshold_limit.
+ */
+ static void amd_threshold_interrupt(void)
+ {
+- u32 low = 0, high = 0, address = 0;
+- unsigned int bank, block, cpu = smp_processor_id();
+- struct thresh_restart tr;
++ struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
++ unsigned int bank, cpu = smp_processor_id();
+
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ if (!(per_cpu(bank_map, cpu) & (1 << bank)))
+ continue;
+- for (block = 0; block < NR_BLOCKS; ++block) {
+- address = get_block_address(cpu, address, low, high, bank, block);
+- if (!address)
+- break;
+-
+- if (rdmsr_safe(address, &low, &high))
+- break;
+-
+- if (!(high & MASK_VALID_HI)) {
+- if (block)
+- continue;
+- else
+- break;
+- }
+-
+- if (!(high & MASK_CNTP_HI) ||
+- (high & MASK_LOCKED_HI))
+- continue;
+-
+- if (!(high & MASK_OVERFLOW_HI))
+- continue;
+
+- /* Log the MCE which caused the threshold event. */
+- log_error_thresholding(bank, ((u64)high << 32) | low);
++ first_block = per_cpu(threshold_banks, cpu)[bank]->blocks;
++ if (!first_block)
++ continue;
+
+- /* Reset threshold block after logging error. */
+- memset(&tr, 0, sizeof(tr));
+- tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
+- threshold_restart_bank(&tr);
+- }
++ /*
++ * The first block is also the head of the list. Check it first
++ * before iterating over the rest.
++ */
++ log_and_reset_block(first_block);
++ list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
++ log_and_reset_block(block);
+ }
+ }
+
+--
+2.7.4
+