From aaefca8e30d9df7a4ca13c9c8e135dd227b8ff19 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 18 Jul 2019 11:29:20 -0700 Subject: [PATCH 1/3] x86/mce: Don't check for the overflow bit on action optional machine checks We currently do not process SRAO (Software Recoverable Action Optional) machine checks if they are logged with the overflow bit set to 1 in the machine check bank status register. This is overly conservative. There are two cases where we could end up with an SRAO+OVER log based on the SDM volume 3 overwrite rules in "Table 15-8. Overwrite Rules for UC, CE, and UCR Errors" 1) First a corrected error is logged, then the SRAO error overwrites. The second error overwrites the first because uncorrected errors have a higher severity than corrected errors. 2) The SRAO error was logged first, followed by a correcetd error. In this case the first error is retained in the bank. So in either case the machine check bank will contain the address of the SRAO error. So we can process that even if the overflow bit was set. Reported-by: Yongkai Wu Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190718182920.32621-1-tony.luck@intel.com --- arch/x86/kernel/cpu/mce/severity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 210f1f5db5f7..87bcdc6dc2f0 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -107,11 +107,11 @@ static struct severity { */ MCESEV( AO, "Action optional: memory scrubbing error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB) + SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB) ), MCESEV( AO, "Action optional: last level cache writeback error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) + SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) ), /* ignore OVER for UCNA */ From 0a54b809a3a2c31e1055b45b03708eb730222be1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Wed, 7 Aug 2019 18:59:29 -0400 Subject: [PATCH 2/3] RAS: Fix prototype warnings When building with C=2 and/or W=1, legitimate warnings are issued about missing prototypes: CHECK drivers/ras/debugfs.c drivers/ras/debugfs.c:4:15: warning: symbol 'ras_debugfs_dir' was not declared. Should it be static? drivers/ras/debugfs.c:8:5: warning: symbol 'ras_userspace_consumers' was not declared. Should it be static? drivers/ras/debugfs.c:38:12: warning: symbol 'ras_add_daemon_trace' was not declared. Should it be static? drivers/ras/debugfs.c:54:13: warning: symbol 'ras_debugfs_init' was not declared. Should it be static? CC drivers/ras/debugfs.o drivers/ras/debugfs.c:8:5: warning: no previous prototype for 'ras_userspace_consumers' [-Wmissing-prototypes] 8 | int ras_userspace_consumers(void) | ^~~~~~~~~~~~~~~~~~~~~~~ drivers/ras/debugfs.c:38:12: warning: no previous prototype for 'ras_add_daemon_trace' [-Wmissing-prototypes] 38 | int __init ras_add_daemon_trace(void) | ^~~~~~~~~~~~~~~~~~~~ drivers/ras/debugfs.c:54:13: warning: no previous prototype for 'ras_debugfs_init' [-Wmissing-prototypes] 54 | void __init ras_debugfs_init(void) | ^~~~~~~~~~~~~~~~ Provide the proper includes. [ bp: Take care of the same warnings for cec.c too. ] Signed-off-by: Valdis Kletnieks Signed-off-by: Borislav Petkov Cc: Tony Luck Cc: linux-edac@vger.kernel.org Cc: x86@kernel.org Link: http://lkml.kernel.org/r/7168.1565218769@turing-police --- drivers/ras/cec.c | 1 + drivers/ras/debugfs.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index 5d545806d930..c09cf55e2d20 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -4,6 +4,7 @@ */ #include #include +#include #include #include diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c index 9c1b717efad8..0d4f985afbf3 100644 --- a/drivers/ras/debugfs.c +++ b/drivers/ras/debugfs.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include +#include "debugfs.h" struct dentry *ras_debugfs_dir; From b6ff24f7b5101101ff897dfdde3f37924e676bc2 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Thu, 8 Aug 2019 16:32:27 +0200 Subject: [PATCH 3/3] RAS: Build debugfs.o only when enabled in Kconfig In addition, the 0day bot reported this build error: >> drivers/ras/debugfs.c:10:5: error: redefinition of 'ras_userspace_consumers' int ras_userspace_consumers(void) ^~~~~~~~~~~~~~~~~~~~~~~ In file included from drivers/ras/debugfs.c:3:0: include/linux/ras.h:14:19: note: previous definition of 'ras_userspace_consumers' was here static inline int ras_userspace_consumers(void) { return 0; } ^~~~~~~~~~~~~~~~~~~~~~~ for a riscv-specific .config where CONFIG_DEBUG_FS is not set. Fix all that by making debugfs.o depend on that define. [ bp: Rewrite commit message. ] Reported-by: kbuild test robot Signed-off-by: Valdis Kletnieks Signed-off-by: Borislav Petkov Cc: Tony Luck Cc: linux-edac@vger.kernel.org Cc: x86@kernel.org Link: http://lkml.kernel.org/r/7053.1565218556@turing-police --- drivers/ras/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index ef6777e14d3d..6f0404f50107 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_RAS) += ras.o debugfs.o +obj-$(CONFIG_RAS) += ras.o +obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o