mirror of
https://github.com/Fishwaldo/build.git
synced 2025-07-23 21:39:02 +00:00
AR-1 - Adding support category for distributions AR-4 - Remove Allwinner legacy AR-5 - Drop Udoo family and move Udoo board into newly created imx6 family AR-9 - Rename sunxi-next to sunxi-legacy AR-10 - Rename sunxi-dev to sunxi-current AR-11 - Adding Radxa Rockpi S support AR-13 - Rename rockchip64-default to rockchip64-legacy AR-14 - Add rockchip64-current as mainline source AR-15 - Drop Rockchip 4.19.y NEXT, current become 5.3.y AR-16 - Rename RK3399 default to legacy AR-17 - Rename Odroid XU4 next and default to legacy 4.14.y, add DEV 5.4.y AR-18 - Add Odroid N2 current mainline AR-19 - Move Odroid C1 to meson family AR-20 - Rename mvebu64-default to mvebu64-legacy AR-21 - Rename mvebu-default to mvebu-legacy AR-22 - Rename mvebu-next to mvebu-current AR-23 - Drop meson64 default and next, current becomes former DEV 5.3.y AR-24 - Drop cubox family and move Cubox/Hummingboard boards under imx6 AR-26 - Adjust motd AR-27 - Enabling distribution release status AR-28 - Added new GCC compilers AR-29 - Implementing Ubuntu Eoan AR-30 - Add desktop packages per board or family AR-31 - Remove (Ubuntu/Debian) distribution name from image filename AR-32 - Move arch configs from configuration.sh to separate arm64 and armhf config files AR-33 - Revision numbers for beta builds changed to day_in_the_year AR-34 - Patches support linked patches AR-35 - Break meson64 family into gxbb and gxl AR-36 - Add Nanopineo2 Black AR-38 - Upgrade option from old branches to new one via armbian-config AR-41 - Show full timezone info AR-43 - Merge Odroid N2 to meson64 AR-44 - Enable FORCE_BOOTSCRIPT_UPDATE for all builds
7052 lines
224 KiB
Diff
7052 lines
224 KiB
Diff
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
|
index 99a08722124d..5f3d58142600 100644
|
|
--- a/Documentation/admin-guide/kernel-parameters.txt
|
|
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
|
@@ -3994,9 +3994,13 @@
|
|
|
|
spectre_v2= [X86] Control mitigation of Spectre variant 2
|
|
(indirect branch speculation) vulnerability.
|
|
+ The default operation protects the kernel from
|
|
+ user space attacks.
|
|
|
|
- on - unconditionally enable
|
|
- off - unconditionally disable
|
|
+ on - unconditionally enable, implies
|
|
+ spectre_v2_user=on
|
|
+ off - unconditionally disable, implies
|
|
+ spectre_v2_user=off
|
|
auto - kernel detects whether your CPU model is
|
|
vulnerable
|
|
|
|
@@ -4006,6 +4010,12 @@
|
|
CONFIG_RETPOLINE configuration option, and the
|
|
compiler with which the kernel was built.
|
|
|
|
+ Selecting 'on' will also enable the mitigation
|
|
+ against user space to user space task attacks.
|
|
+
|
|
+ Selecting 'off' will disable both the kernel and
|
|
+ the user space protections.
|
|
+
|
|
Specific mitigations can also be selected manually:
|
|
|
|
retpoline - replace indirect branches
|
|
@@ -4015,6 +4025,48 @@
|
|
Not specifying this option is equivalent to
|
|
spectre_v2=auto.
|
|
|
|
+ spectre_v2_user=
|
|
+ [X86] Control mitigation of Spectre variant 2
|
|
+ (indirect branch speculation) vulnerability between
|
|
+ user space tasks
|
|
+
|
|
+ on - Unconditionally enable mitigations. Is
|
|
+ enforced by spectre_v2=on
|
|
+
|
|
+ off - Unconditionally disable mitigations. Is
|
|
+ enforced by spectre_v2=off
|
|
+
|
|
+ prctl - Indirect branch speculation is enabled,
|
|
+ but mitigation can be enabled via prctl
|
|
+ per thread. The mitigation control state
|
|
+ is inherited on fork.
|
|
+
|
|
+ prctl,ibpb
|
|
+ - Like "prctl" above, but only STIBP is
|
|
+ controlled per thread. IBPB is issued
|
|
+ always when switching between different user
|
|
+ space processes.
|
|
+
|
|
+ seccomp
|
|
+ - Same as "prctl" above, but all seccomp
|
|
+ threads will enable the mitigation unless
|
|
+ they explicitly opt out.
|
|
+
|
|
+ seccomp,ibpb
|
|
+ - Like "seccomp" above, but only STIBP is
|
|
+ controlled per thread. IBPB is issued
|
|
+ always when switching between different
|
|
+ user space processes.
|
|
+
|
|
+ auto - Kernel selects the mitigation depending on
|
|
+ the available CPU features and vulnerability.
|
|
+
|
|
+ Default mitigation:
|
|
+ If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
|
|
+
|
|
+ Not specifying this option is equivalent to
|
|
+ spectre_v2_user=auto.
|
|
+
|
|
spec_store_bypass_disable=
|
|
[HW] Control Speculative Store Bypass (SSB) Disable mitigation
|
|
(Speculative Store Bypass vulnerability)
|
|
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
|
|
index 32f3d55c54b7..c4dbe6f7cdae 100644
|
|
--- a/Documentation/userspace-api/spec_ctrl.rst
|
|
+++ b/Documentation/userspace-api/spec_ctrl.rst
|
|
@@ -92,3 +92,12 @@ Speculation misfeature controls
|
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
|
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
|
|
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
|
|
+
|
|
+- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
|
|
+ (Mitigate Spectre V2 style attacks against user processes)
|
|
+
|
|
+ Invocations:
|
|
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
|
|
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
|
|
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
|
|
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
|
|
diff --git a/Makefile b/Makefile
|
|
index 58a248264090..572bd98d2344 100644
|
|
--- a/Makefile
|
|
+++ b/Makefile
|
|
@@ -1,7 +1,7 @@
|
|
# SPDX-License-Identifier: GPL-2.0
|
|
VERSION = 4
|
|
PATCHLEVEL = 14
|
|
-SUBLEVEL = 85
|
|
+SUBLEVEL = 86
|
|
EXTRAVERSION =
|
|
NAME = Petit Gorille
|
|
|
|
diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
|
|
index 6e5bd8974f22..679b839bb2eb 100644
|
|
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
|
|
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
|
|
@@ -47,7 +47,11 @@
|
|
#include "rk3288.dtsi"
|
|
|
|
/ {
|
|
- memory@0 {
|
|
+ /*
|
|
+ * The default coreboot on veyron devices ignores memory@0 nodes
|
|
+ * and would instead create another memory node.
|
|
+ */
|
|
+ memory {
|
|
device_type = "memory";
|
|
reg = <0x0 0x0 0x0 0x80000000>;
|
|
};
|
|
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
|
|
index 9a7486058455..eea7f8f070cf 100644
|
|
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
|
|
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
|
|
@@ -130,7 +130,7 @@
|
|
};
|
|
|
|
&pcie0 {
|
|
- ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
|
|
+ ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
|
|
num-lanes = <4>;
|
|
pinctrl-names = "default";
|
|
pinctrl-0 = <&pcie_clkreqn_cpm>;
|
|
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
|
index 2af0af33362a..4f393eb9745f 100644
|
|
--- a/arch/x86/Kconfig
|
|
+++ b/arch/x86/Kconfig
|
|
@@ -440,10 +440,6 @@ config RETPOLINE
|
|
branches. Requires a compiler with -mindirect-branch=thunk-extern
|
|
support for full protection. The kernel may run slower.
|
|
|
|
- Without compiler support, at least indirect branches in assembler
|
|
- code are eliminated. Since this includes the syscall entry path,
|
|
- it is not entirely pointless.
|
|
-
|
|
config INTEL_RDT
|
|
bool "Intel Resource Director Technology support"
|
|
default n
|
|
@@ -959,13 +955,7 @@ config NR_CPUS
|
|
approximately eight kilobytes to the kernel image.
|
|
|
|
config SCHED_SMT
|
|
- bool "SMT (Hyperthreading) scheduler support"
|
|
- depends on SMP
|
|
- ---help---
|
|
- SMT scheduler support improves the CPU scheduler's decision making
|
|
- when dealing with Intel Pentium 4 chips with HyperThreading at a
|
|
- cost of slightly increased overhead in some places. If unsure say
|
|
- N here.
|
|
+ def_bool y if SMP
|
|
|
|
config SCHED_MC
|
|
def_bool y
|
|
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
|
|
index 1c4d012550ec..ce3658dd98e8 100644
|
|
--- a/arch/x86/Makefile
|
|
+++ b/arch/x86/Makefile
|
|
@@ -241,9 +241,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
|
|
|
# Avoid indirect branches in kernel to deal with Spectre
|
|
ifdef CONFIG_RETPOLINE
|
|
-ifneq ($(RETPOLINE_CFLAGS),)
|
|
- KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
|
|
+ifeq ($(RETPOLINE_CFLAGS),)
|
|
+ $(error You are building kernel with non-retpoline compiler, please update your compiler.)
|
|
endif
|
|
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
|
|
endif
|
|
|
|
archscripts: scripts_basic
|
|
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
|
|
index e5097dc85a06..7d12b0d1f359 100644
|
|
--- a/arch/x86/events/core.c
|
|
+++ b/arch/x86/events/core.c
|
|
@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
|
|
if (config == -1LL)
|
|
return -EINVAL;
|
|
|
|
- /*
|
|
- * Branch tracing:
|
|
- */
|
|
- if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
|
|
- !attr->freq && hwc->sample_period == 1) {
|
|
- /* BTS is not supported by this architecture. */
|
|
- if (!x86_pmu.bts_active)
|
|
- return -EOPNOTSUPP;
|
|
-
|
|
- /* BTS is currently only allowed for user-mode. */
|
|
- if (!attr->exclude_kernel)
|
|
- return -EOPNOTSUPP;
|
|
-
|
|
- /* disallow bts if conflicting events are present */
|
|
- if (x86_add_exclusive(x86_lbr_exclusive_lbr))
|
|
- return -EBUSY;
|
|
-
|
|
- event->destroy = hw_perf_lbr_event_destroy;
|
|
- }
|
|
-
|
|
hwc->config |= config;
|
|
|
|
return 0;
|
|
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
|
|
index 228732654cfe..7bb80151bfff 100644
|
|
--- a/arch/x86/events/intel/core.c
|
|
+++ b/arch/x86/events/intel/core.c
|
|
@@ -2345,16 +2345,7 @@ done:
|
|
static struct event_constraint *
|
|
intel_bts_constraints(struct perf_event *event)
|
|
{
|
|
- struct hw_perf_event *hwc = &event->hw;
|
|
- unsigned int hw_event, bts_event;
|
|
-
|
|
- if (event->attr.freq)
|
|
- return NULL;
|
|
-
|
|
- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
|
|
- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
|
|
-
|
|
- if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
|
|
+ if (unlikely(intel_pmu_has_bts(event)))
|
|
return &bts_constraint;
|
|
|
|
return NULL;
|
|
@@ -2973,10 +2964,47 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
|
|
return flags;
|
|
}
|
|
|
|
+static int intel_pmu_bts_config(struct perf_event *event)
|
|
+{
|
|
+ struct perf_event_attr *attr = &event->attr;
|
|
+
|
|
+ if (unlikely(intel_pmu_has_bts(event))) {
|
|
+ /* BTS is not supported by this architecture. */
|
|
+ if (!x86_pmu.bts_active)
|
|
+ return -EOPNOTSUPP;
|
|
+
|
|
+ /* BTS is currently only allowed for user-mode. */
|
|
+ if (!attr->exclude_kernel)
|
|
+ return -EOPNOTSUPP;
|
|
+
|
|
+ /* disallow bts if conflicting events are present */
|
|
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
|
|
+ return -EBUSY;
|
|
+
|
|
+ event->destroy = hw_perf_lbr_event_destroy;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int core_pmu_hw_config(struct perf_event *event)
|
|
+{
|
|
+ int ret = x86_pmu_hw_config(event);
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ return intel_pmu_bts_config(event);
|
|
+}
|
|
+
|
|
static int intel_pmu_hw_config(struct perf_event *event)
|
|
{
|
|
int ret = x86_pmu_hw_config(event);
|
|
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = intel_pmu_bts_config(event);
|
|
if (ret)
|
|
return ret;
|
|
|
|
@@ -2999,7 +3027,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|
/*
|
|
* BTS is set up earlier in this path, so don't account twice
|
|
*/
|
|
- if (!intel_pmu_has_bts(event)) {
|
|
+ if (!unlikely(intel_pmu_has_bts(event))) {
|
|
/* disallow lbr if conflicting events are present */
|
|
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
|
|
return -EBUSY;
|
|
@@ -3462,7 +3490,7 @@ static __initconst const struct x86_pmu core_pmu = {
|
|
.enable_all = core_pmu_enable_all,
|
|
.enable = core_pmu_enable_event,
|
|
.disable = x86_pmu_disable_event,
|
|
- .hw_config = x86_pmu_hw_config,
|
|
+ .hw_config = core_pmu_hw_config,
|
|
.schedule_events = x86_schedule_events,
|
|
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
|
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
|
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
|
|
index c6698c63c047..3c51fcaf1e34 100644
|
|
--- a/arch/x86/events/perf_event.h
|
|
+++ b/arch/x86/events/perf_event.h
|
|
@@ -850,11 +850,16 @@ static inline int amd_pmu_init(void)
|
|
|
|
static inline bool intel_pmu_has_bts(struct perf_event *event)
|
|
{
|
|
- if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
|
|
- !event->attr.freq && event->hw.sample_period == 1)
|
|
- return true;
|
|
+ struct hw_perf_event *hwc = &event->hw;
|
|
+ unsigned int hw_event, bts_event;
|
|
+
|
|
+ if (event->attr.freq)
|
|
+ return false;
|
|
+
|
|
+ hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
|
|
+ bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
|
|
|
|
- return false;
|
|
+ return hw_event == bts_event && hwc->sample_period == 1;
|
|
}
|
|
|
|
int intel_pmu_save_and_restart(struct perf_event *event);
|
|
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
|
index 673d6e988196..7d910827126b 100644
|
|
--- a/arch/x86/include/asm/cpufeatures.h
|
|
+++ b/arch/x86/include/asm/cpufeatures.h
|
|
@@ -284,7 +284,9 @@
|
|
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
|
|
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
|
|
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
|
|
+#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
|
|
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
|
|
+#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
|
|
|
|
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
|
|
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
|
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
|
|
index ef7eec669a1b..62c62d3eb0ff 100644
|
|
--- a/arch/x86/include/asm/msr-index.h
|
|
+++ b/arch/x86/include/asm/msr-index.h
|
|
@@ -41,9 +41,10 @@
|
|
|
|
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
|
|
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
|
|
-#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
|
|
+#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
|
|
+#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
|
|
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
|
|
-#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
|
|
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
|
|
|
|
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
|
|
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
|
|
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
|
|
index 1b4132161c1f..a633767419f2 100644
|
|
--- a/arch/x86/include/asm/nospec-branch.h
|
|
+++ b/arch/x86/include/asm/nospec-branch.h
|
|
@@ -3,6 +3,8 @@
|
|
#ifndef _ASM_X86_NOSPEC_BRANCH_H_
|
|
#define _ASM_X86_NOSPEC_BRANCH_H_
|
|
|
|
+#include <linux/static_key.h>
|
|
+
|
|
#include <asm/alternative.h>
|
|
#include <asm/alternative-asm.h>
|
|
#include <asm/cpufeatures.h>
|
|
@@ -162,29 +164,35 @@
|
|
_ASM_PTR " 999b\n\t" \
|
|
".popsection\n\t"
|
|
|
|
-#if defined(CONFIG_X86_64) && defined(RETPOLINE)
|
|
+#ifdef CONFIG_RETPOLINE
|
|
+#ifdef CONFIG_X86_64
|
|
|
|
/*
|
|
- * Since the inline asm uses the %V modifier which is only in newer GCC,
|
|
- * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
|
|
+ * Inline asm uses the %V modifier which is only in newer GCC
|
|
+ * which is ensured when CONFIG_RETPOLINE is defined.
|
|
*/
|
|
# define CALL_NOSPEC \
|
|
ANNOTATE_NOSPEC_ALTERNATIVE \
|
|
- ALTERNATIVE( \
|
|
+ ALTERNATIVE_2( \
|
|
ANNOTATE_RETPOLINE_SAFE \
|
|
"call *%[thunk_target]\n", \
|
|
"call __x86_indirect_thunk_%V[thunk_target]\n", \
|
|
- X86_FEATURE_RETPOLINE)
|
|
+ X86_FEATURE_RETPOLINE, \
|
|
+ "lfence;\n" \
|
|
+ ANNOTATE_RETPOLINE_SAFE \
|
|
+ "call *%[thunk_target]\n", \
|
|
+ X86_FEATURE_RETPOLINE_AMD)
|
|
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
|
|
|
|
-#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
|
|
+#else /* CONFIG_X86_32 */
|
|
/*
|
|
* For i386 we use the original ret-equivalent retpoline, because
|
|
* otherwise we'll run out of registers. We don't care about CET
|
|
* here, anyway.
|
|
*/
|
|
# define CALL_NOSPEC \
|
|
- ALTERNATIVE( \
|
|
+ ANNOTATE_NOSPEC_ALTERNATIVE \
|
|
+ ALTERNATIVE_2( \
|
|
ANNOTATE_RETPOLINE_SAFE \
|
|
"call *%[thunk_target]\n", \
|
|
" jmp 904f;\n" \
|
|
@@ -199,9 +207,14 @@
|
|
" ret;\n" \
|
|
" .align 16\n" \
|
|
"904: call 901b;\n", \
|
|
- X86_FEATURE_RETPOLINE)
|
|
+ X86_FEATURE_RETPOLINE, \
|
|
+ "lfence;\n" \
|
|
+ ANNOTATE_RETPOLINE_SAFE \
|
|
+ "call *%[thunk_target]\n", \
|
|
+ X86_FEATURE_RETPOLINE_AMD)
|
|
|
|
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
|
+#endif
|
|
#else /* No retpoline for C / inline asm */
|
|
# define CALL_NOSPEC "call *%[thunk_target]\n"
|
|
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
|
@@ -210,14 +223,19 @@
|
|
/* The Spectre V2 mitigation variants */
|
|
enum spectre_v2_mitigation {
|
|
SPECTRE_V2_NONE,
|
|
- SPECTRE_V2_RETPOLINE_MINIMAL,
|
|
- SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
|
|
SPECTRE_V2_RETPOLINE_GENERIC,
|
|
SPECTRE_V2_RETPOLINE_AMD,
|
|
- SPECTRE_V2_IBRS,
|
|
SPECTRE_V2_IBRS_ENHANCED,
|
|
};
|
|
|
|
+/* The indirect branch speculation control variants */
|
|
+enum spectre_v2_user_mitigation {
|
|
+ SPECTRE_V2_USER_NONE,
|
|
+ SPECTRE_V2_USER_STRICT,
|
|
+ SPECTRE_V2_USER_PRCTL,
|
|
+ SPECTRE_V2_USER_SECCOMP,
|
|
+};
|
|
+
|
|
/* The Speculative Store Bypass disable variants */
|
|
enum ssb_mitigation {
|
|
SPEC_STORE_BYPASS_NONE,
|
|
@@ -295,6 +313,10 @@ do { \
|
|
preempt_enable(); \
|
|
} while (0)
|
|
|
|
+DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
|
|
+DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
|
|
+DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
|
|
+
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
/*
|
|
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
|
|
index ae7c2c5cd7f0..5393babc0598 100644
|
|
--- a/arch/x86/include/asm/spec-ctrl.h
|
|
+++ b/arch/x86/include/asm/spec-ctrl.h
|
|
@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
|
|
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
|
|
}
|
|
|
|
+static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
|
|
+{
|
|
+ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
|
|
+ return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
|
|
+}
|
|
+
|
|
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
|
|
{
|
|
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
|
|
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
|
|
}
|
|
|
|
+static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
|
|
+{
|
|
+ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
|
|
+ return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
|
|
+}
|
|
+
|
|
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
|
|
{
|
|
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
|
|
@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
|
|
static inline void speculative_store_bypass_ht_init(void) { }
|
|
#endif
|
|
|
|
-extern void speculative_store_bypass_update(unsigned long tif);
|
|
-
|
|
-static inline void speculative_store_bypass_update_current(void)
|
|
-{
|
|
- speculative_store_bypass_update(current_thread_info()->flags);
|
|
-}
|
|
+extern void speculation_ctrl_update(unsigned long tif);
|
|
+extern void speculation_ctrl_update_current(void);
|
|
|
|
#endif
|
|
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
|
|
index 9b6df68d8fd1..12ef2b49d11b 100644
|
|
--- a/arch/x86/include/asm/switch_to.h
|
|
+++ b/arch/x86/include/asm/switch_to.h
|
|
@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
|
|
|
|
__visible struct task_struct *__switch_to(struct task_struct *prev,
|
|
struct task_struct *next);
|
|
-struct tss_struct;
|
|
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|
- struct tss_struct *tss);
|
|
|
|
/* This runs runs on the previous thread's stack. */
|
|
static inline void prepare_switch_to(struct task_struct *prev,
|
|
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
|
|
index 95ff2d7f553f..bf9175d87844 100644
|
|
--- a/arch/x86/include/asm/thread_info.h
|
|
+++ b/arch/x86/include/asm/thread_info.h
|
|
@@ -81,10 +81,12 @@ struct thread_info {
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
|
-#define TIF_SSBD 5 /* Reduced data speculation */
|
|
+#define TIF_SSBD 5 /* Speculative store bypass disable */
|
|
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
|
|
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
|
#define TIF_SECCOMP 8 /* secure computing */
|
|
+#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
|
|
+#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
|
|
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
|
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
|
#define TIF_PATCH_PENDING 13 /* pending live patching update */
|
|
@@ -112,6 +114,8 @@ struct thread_info {
|
|
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
|
|
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
|
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
|
+#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
|
|
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
|
|
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
|
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
|
|
@@ -147,8 +151,18 @@ struct thread_info {
|
|
_TIF_FSCHECK)
|
|
|
|
/* flags to check in __switch_to() */
|
|
-#define _TIF_WORK_CTXSW \
|
|
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
|
|
+#define _TIF_WORK_CTXSW_BASE \
|
|
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
|
|
+ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
|
|
+
|
|
+/*
|
|
+ * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
|
|
+ */
|
|
+#ifdef CONFIG_SMP
|
|
+# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
|
|
+#else
|
|
+# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
|
|
+#endif
|
|
|
|
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
|
|
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
|
index 2501be609b82..e31040333f0c 100644
|
|
--- a/arch/x86/include/asm/tlbflush.h
|
|
+++ b/arch/x86/include/asm/tlbflush.h
|
|
@@ -185,10 +185,14 @@ struct tlb_state {
|
|
|
|
#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
|
|
|
|
+ /* Last user mm for optimizing IBPB */
|
|
+ union {
|
|
+ struct mm_struct *last_user_mm;
|
|
+ unsigned long last_user_mm_ibpb;
|
|
+ };
|
|
+
|
|
u16 loaded_mm_asid;
|
|
u16 next_asid;
|
|
- /* last user mm's ctx id */
|
|
- u64 last_ctx_id;
|
|
|
|
/*
|
|
* We can be in one of several states:
|
|
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
|
|
index dda741bd5789..7e03515662c0 100644
|
|
--- a/arch/x86/kernel/cpu/amd.c
|
|
+++ b/arch/x86/kernel/cpu/amd.c
|
|
@@ -554,7 +554,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
|
nodes_per_socket = ((value >> 3) & 7) + 1;
|
|
}
|
|
|
|
- if (c->x86 >= 0x15 && c->x86 <= 0x17) {
|
|
+ if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
|
|
+ !boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
|
|
+ c->x86 >= 0x15 && c->x86 <= 0x17) {
|
|
unsigned int bit;
|
|
|
|
switch (c->x86) {
|
|
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
|
|
index e92aedd93806..f7a6d6203e13 100644
|
|
--- a/arch/x86/kernel/cpu/bugs.c
|
|
+++ b/arch/x86/kernel/cpu/bugs.c
|
|
@@ -14,6 +14,7 @@
|
|
#include <linux/module.h>
|
|
#include <linux/nospec.h>
|
|
#include <linux/prctl.h>
|
|
+#include <linux/sched/smt.h>
|
|
|
|
#include <asm/spec-ctrl.h>
|
|
#include <asm/cmdline.h>
|
|
@@ -34,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void);
|
|
static void __init ssb_select_mitigation(void);
|
|
static void __init l1tf_select_mitigation(void);
|
|
|
|
-/*
|
|
- * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
|
|
- * writes to SPEC_CTRL contain whatever reserved bits have been set.
|
|
- */
|
|
-u64 __ro_after_init x86_spec_ctrl_base;
|
|
+/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
|
|
+u64 x86_spec_ctrl_base;
|
|
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
|
|
+static DEFINE_MUTEX(spec_ctrl_mutex);
|
|
|
|
/*
|
|
* The vendor and possibly platform specific bits which can be modified in
|
|
@@ -54,6 +53,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
|
|
u64 __ro_after_init x86_amd_ls_cfg_base;
|
|
u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
|
|
|
|
+/* Control conditional STIPB in switch_to() */
|
|
+DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
|
|
+/* Control conditional IBPB in switch_mm() */
|
|
+DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
|
|
+/* Control unconditional IBPB in switch_mm() */
|
|
+DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
|
|
+
|
|
void __init check_bugs(void)
|
|
{
|
|
identify_boot_cpu();
|
|
@@ -124,31 +130,6 @@ void __init check_bugs(void)
|
|
#endif
|
|
}
|
|
|
|
-/* The kernel command line selection */
|
|
-enum spectre_v2_mitigation_cmd {
|
|
- SPECTRE_V2_CMD_NONE,
|
|
- SPECTRE_V2_CMD_AUTO,
|
|
- SPECTRE_V2_CMD_FORCE,
|
|
- SPECTRE_V2_CMD_RETPOLINE,
|
|
- SPECTRE_V2_CMD_RETPOLINE_GENERIC,
|
|
- SPECTRE_V2_CMD_RETPOLINE_AMD,
|
|
-};
|
|
-
|
|
-static const char *spectre_v2_strings[] = {
|
|
- [SPECTRE_V2_NONE] = "Vulnerable",
|
|
- [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
|
|
- [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
|
|
- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
|
|
- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
|
|
- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
|
|
-};
|
|
-
|
|
-#undef pr_fmt
|
|
-#define pr_fmt(fmt) "Spectre V2 : " fmt
|
|
-
|
|
-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
|
|
- SPECTRE_V2_NONE;
|
|
-
|
|
void
|
|
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
|
|
{
|
|
@@ -166,9 +147,14 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
|
|
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
|
|
|
|
/* SSBD controlled in MSR_SPEC_CTRL */
|
|
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
|
|
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
|
|
+ static_cpu_has(X86_FEATURE_AMD_SSBD))
|
|
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
|
|
|
|
+ /* Conditional STIBP enabled? */
|
|
+ if (static_branch_unlikely(&switch_to_cond_stibp))
|
|
+ hostval |= stibp_tif_to_spec_ctrl(ti->flags);
|
|
+
|
|
if (hostval != guestval) {
|
|
msrval = setguest ? guestval : hostval;
|
|
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
|
|
@@ -202,7 +188,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
|
|
tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
|
|
ssbd_spec_ctrl_to_tif(hostval);
|
|
|
|
- speculative_store_bypass_update(tif);
|
|
+ speculation_ctrl_update(tif);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
|
|
@@ -217,6 +203,15 @@ static void x86_amd_ssb_disable(void)
|
|
wrmsrl(MSR_AMD64_LS_CFG, msrval);
|
|
}
|
|
|
|
+#undef pr_fmt
|
|
+#define pr_fmt(fmt) "Spectre V2 : " fmt
|
|
+
|
|
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
|
|
+ SPECTRE_V2_NONE;
|
|
+
|
|
+static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
|
|
+ SPECTRE_V2_USER_NONE;
|
|
+
|
|
#ifdef RETPOLINE
|
|
static bool spectre_v2_bad_module;
|
|
|
|
@@ -238,67 +233,217 @@ static inline const char *spectre_v2_module_string(void)
|
|
static inline const char *spectre_v2_module_string(void) { return ""; }
|
|
#endif
|
|
|
|
-static void __init spec2_print_if_insecure(const char *reason)
|
|
+static inline bool match_option(const char *arg, int arglen, const char *opt)
|
|
{
|
|
- if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
|
- pr_info("%s selected on command line.\n", reason);
|
|
+ int len = strlen(opt);
|
|
+
|
|
+ return len == arglen && !strncmp(arg, opt, len);
|
|
}
|
|
|
|
-static void __init spec2_print_if_secure(const char *reason)
|
|
+/* The kernel command line selection for spectre v2 */
|
|
+enum spectre_v2_mitigation_cmd {
|
|
+ SPECTRE_V2_CMD_NONE,
|
|
+ SPECTRE_V2_CMD_AUTO,
|
|
+ SPECTRE_V2_CMD_FORCE,
|
|
+ SPECTRE_V2_CMD_RETPOLINE,
|
|
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
|
|
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
|
|
+};
|
|
+
|
|
+enum spectre_v2_user_cmd {
|
|
+ SPECTRE_V2_USER_CMD_NONE,
|
|
+ SPECTRE_V2_USER_CMD_AUTO,
|
|
+ SPECTRE_V2_USER_CMD_FORCE,
|
|
+ SPECTRE_V2_USER_CMD_PRCTL,
|
|
+ SPECTRE_V2_USER_CMD_PRCTL_IBPB,
|
|
+ SPECTRE_V2_USER_CMD_SECCOMP,
|
|
+ SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
|
|
+};
|
|
+
|
|
+static const char * const spectre_v2_user_strings[] = {
|
|
+ [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
|
|
+ [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
|
|
+ [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
|
|
+ [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
|
|
+};
|
|
+
|
|
+static const struct {
|
|
+ const char *option;
|
|
+ enum spectre_v2_user_cmd cmd;
|
|
+ bool secure;
|
|
+} v2_user_options[] __initdata = {
|
|
+ { "auto", SPECTRE_V2_USER_CMD_AUTO, false },
|
|
+ { "off", SPECTRE_V2_USER_CMD_NONE, false },
|
|
+ { "on", SPECTRE_V2_USER_CMD_FORCE, true },
|
|
+ { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
|
|
+ { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
|
|
+ { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
|
|
+ { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
|
|
+};
|
|
+
|
|
+static void __init spec_v2_user_print_cond(const char *reason, bool secure)
|
|
{
|
|
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
|
- pr_info("%s selected on command line.\n", reason);
|
|
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
|
|
+ pr_info("spectre_v2_user=%s forced on command line.\n", reason);
|
|
}
|
|
|
|
-static inline bool retp_compiler(void)
|
|
+static enum spectre_v2_user_cmd __init
|
|
+spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
{
|
|
- return __is_defined(RETPOLINE);
|
|
+ char arg[20];
|
|
+ int ret, i;
|
|
+
|
|
+ switch (v2_cmd) {
|
|
+ case SPECTRE_V2_CMD_NONE:
|
|
+ return SPECTRE_V2_USER_CMD_NONE;
|
|
+ case SPECTRE_V2_CMD_FORCE:
|
|
+ return SPECTRE_V2_USER_CMD_FORCE;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
|
|
+ arg, sizeof(arg));
|
|
+ if (ret < 0)
|
|
+ return SPECTRE_V2_USER_CMD_AUTO;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
|
|
+ if (match_option(arg, ret, v2_user_options[i].option)) {
|
|
+ spec_v2_user_print_cond(v2_user_options[i].option,
|
|
+ v2_user_options[i].secure);
|
|
+ return v2_user_options[i].cmd;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
|
|
+ return SPECTRE_V2_USER_CMD_AUTO;
|
|
}
|
|
|
|
-static inline bool match_option(const char *arg, int arglen, const char *opt)
|
|
+static void __init
|
|
+spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
{
|
|
- int len = strlen(opt);
|
|
+ enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
|
|
+ bool smt_possible = IS_ENABLED(CONFIG_SMP);
|
|
+ enum spectre_v2_user_cmd cmd;
|
|
|
|
- return len == arglen && !strncmp(arg, opt, len);
|
|
+ if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
|
|
+ return;
|
|
+
|
|
+ if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
|
|
+ cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
|
|
+ smt_possible = false;
|
|
+
|
|
+ cmd = spectre_v2_parse_user_cmdline(v2_cmd);
|
|
+ switch (cmd) {
|
|
+ case SPECTRE_V2_USER_CMD_NONE:
|
|
+ goto set_mode;
|
|
+ case SPECTRE_V2_USER_CMD_FORCE:
|
|
+ mode = SPECTRE_V2_USER_STRICT;
|
|
+ break;
|
|
+ case SPECTRE_V2_USER_CMD_PRCTL:
|
|
+ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
|
|
+ mode = SPECTRE_V2_USER_PRCTL;
|
|
+ break;
|
|
+ case SPECTRE_V2_USER_CMD_AUTO:
|
|
+ case SPECTRE_V2_USER_CMD_SECCOMP:
|
|
+ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
|
|
+ if (IS_ENABLED(CONFIG_SECCOMP))
|
|
+ mode = SPECTRE_V2_USER_SECCOMP;
|
|
+ else
|
|
+ mode = SPECTRE_V2_USER_PRCTL;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Initialize Indirect Branch Prediction Barrier */
|
|
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
|
|
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
|
|
+
|
|
+ switch (cmd) {
|
|
+ case SPECTRE_V2_USER_CMD_FORCE:
|
|
+ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
|
|
+ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
|
|
+ static_branch_enable(&switch_mm_always_ibpb);
|
|
+ break;
|
|
+ case SPECTRE_V2_USER_CMD_PRCTL:
|
|
+ case SPECTRE_V2_USER_CMD_AUTO:
|
|
+ case SPECTRE_V2_USER_CMD_SECCOMP:
|
|
+ static_branch_enable(&switch_mm_cond_ibpb);
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
|
|
+ static_key_enabled(&switch_mm_always_ibpb) ?
|
|
+ "always-on" : "conditional");
|
|
+ }
|
|
+
|
|
+ /* If enhanced IBRS is enabled no STIPB required */
|
|
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * If SMT is not possible or STIBP is not available clear the STIPB
|
|
+ * mode.
|
|
+ */
|
|
+ if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
|
|
+ mode = SPECTRE_V2_USER_NONE;
|
|
+set_mode:
|
|
+ spectre_v2_user = mode;
|
|
+ /* Only print the STIBP mode when SMT possible */
|
|
+ if (smt_possible)
|
|
+ pr_info("%s\n", spectre_v2_user_strings[mode]);
|
|
}
|
|
|
|
+static const char * const spectre_v2_strings[] = {
|
|
+ [SPECTRE_V2_NONE] = "Vulnerable",
|
|
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
|
|
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
|
|
+ [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
|
|
+};
|
|
+
|
|
static const struct {
|
|
const char *option;
|
|
enum spectre_v2_mitigation_cmd cmd;
|
|
bool secure;
|
|
-} mitigation_options[] = {
|
|
- { "off", SPECTRE_V2_CMD_NONE, false },
|
|
- { "on", SPECTRE_V2_CMD_FORCE, true },
|
|
- { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
|
|
- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
|
|
- { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
|
|
- { "auto", SPECTRE_V2_CMD_AUTO, false },
|
|
+} mitigation_options[] __initdata = {
|
|
+ { "off", SPECTRE_V2_CMD_NONE, false },
|
|
+ { "on", SPECTRE_V2_CMD_FORCE, true },
|
|
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
|
|
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
|
|
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
|
|
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
|
|
};
|
|
|
|
+static void __init spec_v2_print_cond(const char *reason, bool secure)
|
|
+{
|
|
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
|
|
+ pr_info("%s selected on command line.\n", reason);
|
|
+}
|
|
+
|
|
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
|
|
{
|
|
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
|
|
char arg[20];
|
|
int ret, i;
|
|
- enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
|
|
|
|
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
|
|
return SPECTRE_V2_CMD_NONE;
|
|
- else {
|
|
- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
|
|
- if (ret < 0)
|
|
- return SPECTRE_V2_CMD_AUTO;
|
|
|
|
- for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
|
|
- if (!match_option(arg, ret, mitigation_options[i].option))
|
|
- continue;
|
|
- cmd = mitigation_options[i].cmd;
|
|
- break;
|
|
- }
|
|
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
|
|
+ if (ret < 0)
|
|
+ return SPECTRE_V2_CMD_AUTO;
|
|
|
|
- if (i >= ARRAY_SIZE(mitigation_options)) {
|
|
- pr_err("unknown option (%s). Switching to AUTO select\n", arg);
|
|
- return SPECTRE_V2_CMD_AUTO;
|
|
- }
|
|
+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
|
|
+ if (!match_option(arg, ret, mitigation_options[i].option))
|
|
+ continue;
|
|
+ cmd = mitigation_options[i].cmd;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (i >= ARRAY_SIZE(mitigation_options)) {
|
|
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
|
|
+ return SPECTRE_V2_CMD_AUTO;
|
|
}
|
|
|
|
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
|
|
@@ -315,11 +460,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
|
|
return SPECTRE_V2_CMD_AUTO;
|
|
}
|
|
|
|
- if (mitigation_options[i].secure)
|
|
- spec2_print_if_secure(mitigation_options[i].option);
|
|
- else
|
|
- spec2_print_if_insecure(mitigation_options[i].option);
|
|
-
|
|
+ spec_v2_print_cond(mitigation_options[i].option,
|
|
+ mitigation_options[i].secure);
|
|
return cmd;
|
|
}
|
|
|
|
@@ -375,14 +517,12 @@ retpoline_auto:
|
|
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
|
|
goto retpoline_generic;
|
|
}
|
|
- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
|
|
- SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
|
|
+ mode = SPECTRE_V2_RETPOLINE_AMD;
|
|
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
|
|
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
|
|
} else {
|
|
retpoline_generic:
|
|
- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
|
|
- SPECTRE_V2_RETPOLINE_MINIMAL;
|
|
+ mode = SPECTRE_V2_RETPOLINE_GENERIC;
|
|
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
|
|
}
|
|
|
|
@@ -401,12 +541,6 @@ specv2_set_mode:
|
|
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
|
|
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
|
|
|
|
- /* Initialize Indirect Branch Prediction Barrier if supported */
|
|
- if (boot_cpu_has(X86_FEATURE_IBPB)) {
|
|
- setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
|
|
- pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
|
|
- }
|
|
-
|
|
/*
|
|
* Retpoline means the kernel is safe because it has no indirect
|
|
* branches. Enhanced IBRS protects firmware too, so, enable restricted
|
|
@@ -422,6 +556,66 @@ specv2_set_mode:
|
|
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
|
|
pr_info("Enabling Restricted Speculation for firmware calls\n");
|
|
}
|
|
+
|
|
+ /* Set up IBPB and STIBP depending on the general spectre V2 command */
|
|
+ spectre_v2_user_select_mitigation(cmd);
|
|
+
|
|
+ /* Enable STIBP if appropriate */
|
|
+ arch_smt_update();
|
|
+}
|
|
+
|
|
+static void update_stibp_msr(void * __unused)
|
|
+{
|
|
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
|
+}
|
|
+
|
|
+/* Update x86_spec_ctrl_base in case SMT state changed. */
|
|
+static void update_stibp_strict(void)
|
|
+{
|
|
+ u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
|
|
+
|
|
+ if (sched_smt_active())
|
|
+ mask |= SPEC_CTRL_STIBP;
|
|
+
|
|
+ if (mask == x86_spec_ctrl_base)
|
|
+ return;
|
|
+
|
|
+ pr_info("Update user space SMT mitigation: STIBP %s\n",
|
|
+ mask & SPEC_CTRL_STIBP ? "always-on" : "off");
|
|
+ x86_spec_ctrl_base = mask;
|
|
+ on_each_cpu(update_stibp_msr, NULL, 1);
|
|
+}
|
|
+
|
|
+/* Update the static key controlling the evaluation of TIF_SPEC_IB */
|
|
+static void update_indir_branch_cond(void)
|
|
+{
|
|
+ if (sched_smt_active())
|
|
+ static_branch_enable(&switch_to_cond_stibp);
|
|
+ else
|
|
+ static_branch_disable(&switch_to_cond_stibp);
|
|
+}
|
|
+
|
|
+void arch_smt_update(void)
|
|
+{
|
|
+ /* Enhanced IBRS implies STIBP. No update required. */
|
|
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
|
+ return;
|
|
+
|
|
+ mutex_lock(&spec_ctrl_mutex);
|
|
+
|
|
+ switch (spectre_v2_user) {
|
|
+ case SPECTRE_V2_USER_NONE:
|
|
+ break;
|
|
+ case SPECTRE_V2_USER_STRICT:
|
|
+ update_stibp_strict();
|
|
+ break;
|
|
+ case SPECTRE_V2_USER_PRCTL:
|
|
+ case SPECTRE_V2_USER_SECCOMP:
|
|
+ update_indir_branch_cond();
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ mutex_unlock(&spec_ctrl_mutex);
|
|
}
|
|
|
|
#undef pr_fmt
|
|
@@ -438,7 +632,7 @@ enum ssb_mitigation_cmd {
|
|
SPEC_STORE_BYPASS_CMD_SECCOMP,
|
|
};
|
|
|
|
-static const char *ssb_strings[] = {
|
|
+static const char * const ssb_strings[] = {
|
|
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
|
|
[SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
|
|
[SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
|
|
@@ -448,7 +642,7 @@ static const char *ssb_strings[] = {
|
|
static const struct {
|
|
const char *option;
|
|
enum ssb_mitigation_cmd cmd;
|
|
-} ssb_mitigation_options[] = {
|
|
+} ssb_mitigation_options[] __initdata = {
|
|
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
|
|
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
|
|
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
|
|
@@ -532,18 +726,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
|
|
if (mode == SPEC_STORE_BYPASS_DISABLE) {
|
|
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
|
|
/*
|
|
- * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
|
|
- * a completely different MSR and bit dependent on family.
|
|
+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
|
|
+ * use a completely different MSR and bit dependent on family.
|
|
*/
|
|
- switch (boot_cpu_data.x86_vendor) {
|
|
- case X86_VENDOR_INTEL:
|
|
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
|
|
+ !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
|
|
+ x86_amd_ssb_disable();
|
|
+ } else {
|
|
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
|
|
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
|
|
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
|
- break;
|
|
- case X86_VENDOR_AMD:
|
|
- x86_amd_ssb_disable();
|
|
- break;
|
|
}
|
|
}
|
|
|
|
@@ -561,10 +753,25 @@ static void ssb_select_mitigation(void)
|
|
#undef pr_fmt
|
|
#define pr_fmt(fmt) "Speculation prctl: " fmt
|
|
|
|
-static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
|
|
+static void task_update_spec_tif(struct task_struct *tsk)
|
|
{
|
|
- bool update;
|
|
+ /* Force the update of the real TIF bits */
|
|
+ set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
|
|
|
|
+ /*
|
|
+ * Immediately update the speculation control MSRs for the current
|
|
+ * task, but for a non-current task delay setting the CPU
|
|
+ * mitigation until it is scheduled next.
|
|
+ *
|
|
+ * This can only happen for SECCOMP mitigation. For PRCTL it's
|
|
+ * always the current task.
|
|
+ */
|
|
+ if (tsk == current)
|
|
+ speculation_ctrl_update_current();
|
|
+}
|
|
+
|
|
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
|
|
+{
|
|
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
|
|
ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
|
|
return -ENXIO;
|
|
@@ -575,28 +782,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
|
|
if (task_spec_ssb_force_disable(task))
|
|
return -EPERM;
|
|
task_clear_spec_ssb_disable(task);
|
|
- update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
|
|
+ task_update_spec_tif(task);
|
|
break;
|
|
case PR_SPEC_DISABLE:
|
|
task_set_spec_ssb_disable(task);
|
|
- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
|
|
+ task_update_spec_tif(task);
|
|
break;
|
|
case PR_SPEC_FORCE_DISABLE:
|
|
task_set_spec_ssb_disable(task);
|
|
task_set_spec_ssb_force_disable(task);
|
|
- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
|
|
+ task_update_spec_tif(task);
|
|
break;
|
|
default:
|
|
return -ERANGE;
|
|
}
|
|
+ return 0;
|
|
+}
|
|
|
|
- /*
|
|
- * If being set on non-current task, delay setting the CPU
|
|
- * mitigation until it is next scheduled.
|
|
- */
|
|
- if (task == current && update)
|
|
- speculative_store_bypass_update_current();
|
|
-
|
|
+static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
|
|
+{
|
|
+ switch (ctrl) {
|
|
+ case PR_SPEC_ENABLE:
|
|
+ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
|
|
+ return 0;
|
|
+ /*
|
|
+ * Indirect branch speculation is always disabled in strict
|
|
+ * mode.
|
|
+ */
|
|
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
|
|
+ return -EPERM;
|
|
+ task_clear_spec_ib_disable(task);
|
|
+ task_update_spec_tif(task);
|
|
+ break;
|
|
+ case PR_SPEC_DISABLE:
|
|
+ case PR_SPEC_FORCE_DISABLE:
|
|
+ /*
|
|
+ * Indirect branch speculation is always allowed when
|
|
+ * mitigation is force disabled.
|
|
+ */
|
|
+ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
|
|
+ return -EPERM;
|
|
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
|
|
+ return 0;
|
|
+ task_set_spec_ib_disable(task);
|
|
+ if (ctrl == PR_SPEC_FORCE_DISABLE)
|
|
+ task_set_spec_ib_force_disable(task);
|
|
+ task_update_spec_tif(task);
|
|
+ break;
|
|
+ default:
|
|
+ return -ERANGE;
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -606,6 +841,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
|
|
switch (which) {
|
|
case PR_SPEC_STORE_BYPASS:
|
|
return ssb_prctl_set(task, ctrl);
|
|
+ case PR_SPEC_INDIRECT_BRANCH:
|
|
+ return ib_prctl_set(task, ctrl);
|
|
default:
|
|
return -ENODEV;
|
|
}
|
|
@@ -616,6 +853,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
|
|
{
|
|
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
|
|
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
|
|
+ if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
|
|
+ ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
|
|
}
|
|
#endif
|
|
|
|
@@ -638,11 +877,35 @@ static int ssb_prctl_get(struct task_struct *task)
|
|
}
|
|
}
|
|
|
|
+static int ib_prctl_get(struct task_struct *task)
|
|
+{
|
|
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
|
+ return PR_SPEC_NOT_AFFECTED;
|
|
+
|
|
+ switch (spectre_v2_user) {
|
|
+ case SPECTRE_V2_USER_NONE:
|
|
+ return PR_SPEC_ENABLE;
|
|
+ case SPECTRE_V2_USER_PRCTL:
|
|
+ case SPECTRE_V2_USER_SECCOMP:
|
|
+ if (task_spec_ib_force_disable(task))
|
|
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
|
|
+ if (task_spec_ib_disable(task))
|
|
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
|
|
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
|
|
+ case SPECTRE_V2_USER_STRICT:
|
|
+ return PR_SPEC_DISABLE;
|
|
+ default:
|
|
+ return PR_SPEC_NOT_AFFECTED;
|
|
+ }
|
|
+}
|
|
+
|
|
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
|
|
{
|
|
switch (which) {
|
|
case PR_SPEC_STORE_BYPASS:
|
|
return ssb_prctl_get(task);
|
|
+ case PR_SPEC_INDIRECT_BRANCH:
|
|
+ return ib_prctl_get(task);
|
|
default:
|
|
return -ENODEV;
|
|
}
|
|
@@ -780,7 +1043,7 @@ early_param("l1tf", l1tf_cmdline);
|
|
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
|
|
|
|
#if IS_ENABLED(CONFIG_KVM_INTEL)
|
|
-static const char *l1tf_vmx_states[] = {
|
|
+static const char * const l1tf_vmx_states[] = {
|
|
[VMENTER_L1D_FLUSH_AUTO] = "auto",
|
|
[VMENTER_L1D_FLUSH_NEVER] = "vulnerable",
|
|
[VMENTER_L1D_FLUSH_COND] = "conditional cache flushes",
|
|
@@ -796,13 +1059,14 @@ static ssize_t l1tf_show_state(char *buf)
|
|
|
|
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
|
|
(l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
|
|
- cpu_smt_control == CPU_SMT_ENABLED))
|
|
+ sched_smt_active())) {
|
|
return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
|
|
l1tf_vmx_states[l1tf_vmx_mitigation]);
|
|
+ }
|
|
|
|
return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
|
|
l1tf_vmx_states[l1tf_vmx_mitigation],
|
|
- cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
|
|
+ sched_smt_active() ? "vulnerable" : "disabled");
|
|
}
|
|
#else
|
|
static ssize_t l1tf_show_state(char *buf)
|
|
@@ -811,6 +1075,36 @@ static ssize_t l1tf_show_state(char *buf)
|
|
}
|
|
#endif
|
|
|
|
+static char *stibp_state(void)
|
|
+{
|
|
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
|
+ return "";
|
|
+
|
|
+ switch (spectre_v2_user) {
|
|
+ case SPECTRE_V2_USER_NONE:
|
|
+ return ", STIBP: disabled";
|
|
+ case SPECTRE_V2_USER_STRICT:
|
|
+ return ", STIBP: forced";
|
|
+ case SPECTRE_V2_USER_PRCTL:
|
|
+ case SPECTRE_V2_USER_SECCOMP:
|
|
+ if (static_key_enabled(&switch_to_cond_stibp))
|
|
+ return ", STIBP: conditional";
|
|
+ }
|
|
+ return "";
|
|
+}
|
|
+
|
|
+static char *ibpb_state(void)
|
|
+{
|
|
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
|
|
+ if (static_key_enabled(&switch_mm_always_ibpb))
|
|
+ return ", IBPB: always-on";
|
|
+ if (static_key_enabled(&switch_mm_cond_ibpb))
|
|
+ return ", IBPB: conditional";
|
|
+ return ", IBPB: disabled";
|
|
+ }
|
|
+ return "";
|
|
+}
|
|
+
|
|
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
|
|
char *buf, unsigned int bug)
|
|
{
|
|
@@ -828,9 +1122,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
|
|
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
|
|
|
|
case X86_BUG_SPECTRE_V2:
|
|
- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
|
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
|
|
+ return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
|
+ ibpb_state(),
|
|
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
|
|
+ stibp_state(),
|
|
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
|
|
spectre_v2_module_string());
|
|
|
|
case X86_BUG_SPEC_STORE_BYPASS:
|
|
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
|
index 96643e2c75b8..51e49f6fe8e1 100644
|
|
--- a/arch/x86/kernel/cpu/common.c
|
|
+++ b/arch/x86/kernel/cpu/common.c
|
|
@@ -760,6 +760,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
|
|
set_cpu_cap(c, X86_FEATURE_STIBP);
|
|
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
|
|
}
|
|
+
|
|
+ if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
|
|
+ set_cpu_cap(c, X86_FEATURE_SSBD);
|
|
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
|
|
+ clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
|
|
+ }
|
|
}
|
|
|
|
void get_cpu_cap(struct cpuinfo_x86 *c)
|
|
@@ -958,7 +964,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
|
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
|
|
|
|
if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
|
|
- !(ia32_cap & ARCH_CAP_SSB_NO))
|
|
+ !(ia32_cap & ARCH_CAP_SSB_NO) &&
|
|
+ !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
|
|
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
|
|
|
|
if (x86_match_cpu(cpu_no_speculation))
|
|
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
|
|
index dbcb01006749..beec0daecbc5 100644
|
|
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
|
|
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
|
|
@@ -56,7 +56,7 @@
|
|
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
|
|
#define SMCA_THR_LVT_OFF 0xF000
|
|
|
|
-static bool thresholding_en;
|
|
+static bool thresholding_irq_en;
|
|
|
|
static const char * const th_names[] = {
|
|
"load_store",
|
|
@@ -533,9 +533,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
|
|
|
|
set_offset:
|
|
offset = setup_APIC_mce_threshold(offset, new);
|
|
-
|
|
- if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
|
|
- mce_threshold_vector = amd_threshold_interrupt;
|
|
+ if (offset == new)
|
|
+ thresholding_irq_en = true;
|
|
|
|
done:
|
|
mce_threshold_block_init(&b, offset);
|
|
@@ -1356,9 +1355,6 @@ int mce_threshold_remove_device(unsigned int cpu)
|
|
{
|
|
unsigned int bank;
|
|
|
|
- if (!thresholding_en)
|
|
- return 0;
|
|
-
|
|
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
|
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
|
continue;
|
|
@@ -1376,9 +1372,6 @@ int mce_threshold_create_device(unsigned int cpu)
|
|
struct threshold_bank **bp;
|
|
int err = 0;
|
|
|
|
- if (!thresholding_en)
|
|
- return 0;
|
|
-
|
|
bp = per_cpu(threshold_banks, cpu);
|
|
if (bp)
|
|
return 0;
|
|
@@ -1407,9 +1400,6 @@ static __init int threshold_init_device(void)
|
|
{
|
|
unsigned lcpu = 0;
|
|
|
|
- if (mce_threshold_vector == amd_threshold_interrupt)
|
|
- thresholding_en = true;
|
|
-
|
|
/* to hit CPUs online before the notifier is up */
|
|
for_each_online_cpu(lcpu) {
|
|
int err = mce_threshold_create_device(lcpu);
|
|
@@ -1418,6 +1408,9 @@ static __init int threshold_init_device(void)
|
|
return err;
|
|
}
|
|
|
|
+ if (thresholding_irq_en)
|
|
+ mce_threshold_vector = amd_threshold_interrupt;
|
|
+
|
|
return 0;
|
|
}
|
|
/*
|
|
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
|
|
index 61a949d84dfa..d99a8ee9e185 100644
|
|
--- a/arch/x86/kernel/fpu/signal.c
|
|
+++ b/arch/x86/kernel/fpu/signal.c
|
|
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
|
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
|
|
}
|
|
|
|
+ local_bh_disable();
|
|
fpu->initialized = 1;
|
|
- preempt_disable();
|
|
fpu__restore(fpu);
|
|
- preempt_enable();
|
|
+ local_bh_enable();
|
|
|
|
return err;
|
|
} else {
|
|
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
|
|
index 988a98f34c66..a98d1cdd6299 100644
|
|
--- a/arch/x86/kernel/process.c
|
|
+++ b/arch/x86/kernel/process.c
|
|
@@ -41,6 +41,8 @@
|
|
#include <asm/prctl.h>
|
|
#include <asm/spec-ctrl.h>
|
|
|
|
+#include "process.h"
|
|
+
|
|
/*
|
|
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
|
* no more per-task TSS's. The TSS size is kept cacheline-aligned
|
|
@@ -255,11 +257,12 @@ void arch_setup_new_exec(void)
|
|
enable_cpuid();
|
|
}
|
|
|
|
-static inline void switch_to_bitmap(struct tss_struct *tss,
|
|
- struct thread_struct *prev,
|
|
+static inline void switch_to_bitmap(struct thread_struct *prev,
|
|
struct thread_struct *next,
|
|
unsigned long tifp, unsigned long tifn)
|
|
{
|
|
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
|
|
+
|
|
if (tifn & _TIF_IO_BITMAP) {
|
|
/*
|
|
* Copy the relevant range of the IO bitmap.
|
|
@@ -398,32 +401,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
|
|
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
|
|
}
|
|
|
|
-static __always_inline void intel_set_ssb_state(unsigned long tifn)
|
|
+/*
|
|
+ * Update the MSRs managing speculation control, during context switch.
|
|
+ *
|
|
+ * tifp: Previous task's thread flags
|
|
+ * tifn: Next task's thread flags
|
|
+ */
|
|
+static __always_inline void __speculation_ctrl_update(unsigned long tifp,
|
|
+ unsigned long tifn)
|
|
{
|
|
- u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
|
|
+ unsigned long tif_diff = tifp ^ tifn;
|
|
+ u64 msr = x86_spec_ctrl_base;
|
|
+ bool updmsr = false;
|
|
+
|
|
+ /*
|
|
+ * If TIF_SSBD is different, select the proper mitigation
|
|
+ * method. Note that if SSBD mitigation is disabled or permanentely
|
|
+ * enabled this branch can't be taken because nothing can set
|
|
+ * TIF_SSBD.
|
|
+ */
|
|
+ if (tif_diff & _TIF_SSBD) {
|
|
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
|
|
+ amd_set_ssb_virt_state(tifn);
|
|
+ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
|
|
+ amd_set_core_ssb_state(tifn);
|
|
+ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
|
|
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
|
|
+ msr |= ssbd_tif_to_spec_ctrl(tifn);
|
|
+ updmsr = true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
|
|
+ * otherwise avoid the MSR write.
|
|
+ */
|
|
+ if (IS_ENABLED(CONFIG_SMP) &&
|
|
+ static_branch_unlikely(&switch_to_cond_stibp)) {
|
|
+ updmsr |= !!(tif_diff & _TIF_SPEC_IB);
|
|
+ msr |= stibp_tif_to_spec_ctrl(tifn);
|
|
+ }
|
|
|
|
- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
|
|
+ if (updmsr)
|
|
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
|
|
}
|
|
|
|
-static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
|
|
+static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
|
|
{
|
|
- if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
|
|
- amd_set_ssb_virt_state(tifn);
|
|
- else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
|
|
- amd_set_core_ssb_state(tifn);
|
|
- else
|
|
- intel_set_ssb_state(tifn);
|
|
+ if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
|
|
+ if (task_spec_ssb_disable(tsk))
|
|
+ set_tsk_thread_flag(tsk, TIF_SSBD);
|
|
+ else
|
|
+ clear_tsk_thread_flag(tsk, TIF_SSBD);
|
|
+
|
|
+ if (task_spec_ib_disable(tsk))
|
|
+ set_tsk_thread_flag(tsk, TIF_SPEC_IB);
|
|
+ else
|
|
+ clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
|
|
+ }
|
|
+ /* Return the updated threadinfo flags*/
|
|
+ return task_thread_info(tsk)->flags;
|
|
}
|
|
|
|
-void speculative_store_bypass_update(unsigned long tif)
|
|
+void speculation_ctrl_update(unsigned long tif)
|
|
{
|
|
+ /* Forced update. Make sure all relevant TIF flags are different */
|
|
preempt_disable();
|
|
- __speculative_store_bypass_update(tif);
|
|
+ __speculation_ctrl_update(~tif, tif);
|
|
preempt_enable();
|
|
}
|
|
|
|
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|
- struct tss_struct *tss)
|
|
+/* Called from seccomp/prctl update */
|
|
+void speculation_ctrl_update_current(void)
|
|
+{
|
|
+ preempt_disable();
|
|
+ speculation_ctrl_update(speculation_ctrl_update_tif(current));
|
|
+ preempt_enable();
|
|
+}
|
|
+
|
|
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
|
{
|
|
struct thread_struct *prev, *next;
|
|
unsigned long tifp, tifn;
|
|
@@ -433,7 +489,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|
|
|
tifn = READ_ONCE(task_thread_info(next_p)->flags);
|
|
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
|
|
- switch_to_bitmap(tss, prev, next, tifp, tifn);
|
|
+ switch_to_bitmap(prev, next, tifp, tifn);
|
|
|
|
propagate_user_return_notify(prev_p, next_p);
|
|
|
|
@@ -454,8 +510,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|
if ((tifp ^ tifn) & _TIF_NOCPUID)
|
|
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
|
|
|
|
- if ((tifp ^ tifn) & _TIF_SSBD)
|
|
- __speculative_store_bypass_update(tifn);
|
|
+ if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
|
|
+ __speculation_ctrl_update(tifp, tifn);
|
|
+ } else {
|
|
+ speculation_ctrl_update_tif(prev_p);
|
|
+ tifn = speculation_ctrl_update_tif(next_p);
|
|
+
|
|
+ /* Enforce MSR update to ensure consistent state */
|
|
+ __speculation_ctrl_update(~tifn, tifn);
|
|
+ }
|
|
}
|
|
|
|
/*
|
|
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
|
|
new file mode 100644
|
|
index 000000000000..898e97cf6629
|
|
--- /dev/null
|
|
+++ b/arch/x86/kernel/process.h
|
|
@@ -0,0 +1,39 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+//
|
|
+// Code shared between 32 and 64 bit
|
|
+
|
|
+#include <asm/spec-ctrl.h>
|
|
+
|
|
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
|
|
+
|
|
+/*
|
|
+ * This needs to be inline to optimize for the common case where no extra
|
|
+ * work needs to be done.
|
|
+ */
|
|
+static inline void switch_to_extra(struct task_struct *prev,
|
|
+ struct task_struct *next)
|
|
+{
|
|
+ unsigned long next_tif = task_thread_info(next)->flags;
|
|
+ unsigned long prev_tif = task_thread_info(prev)->flags;
|
|
+
|
|
+ if (IS_ENABLED(CONFIG_SMP)) {
|
|
+ /*
|
|
+ * Avoid __switch_to_xtra() invocation when conditional
|
|
+ * STIPB is disabled and the only different bit is
|
|
+ * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
|
|
+ * in the TIF_WORK_CTXSW masks.
|
|
+ */
|
|
+ if (!static_branch_likely(&switch_to_cond_stibp)) {
|
|
+ prev_tif &= ~_TIF_SPEC_IB;
|
|
+ next_tif &= ~_TIF_SPEC_IB;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * __switch_to_xtra() handles debug registers, i/o bitmaps,
|
|
+ * speculation mitigations etc.
|
|
+ */
|
|
+ if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
|
|
+ prev_tif & _TIF_WORK_CTXSW_PREV))
|
|
+ __switch_to_xtra(prev, next);
|
|
+}
|
|
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
|
index 5224c6099184..c2df91eab573 100644
|
|
--- a/arch/x86/kernel/process_32.c
|
|
+++ b/arch/x86/kernel/process_32.c
|
|
@@ -59,6 +59,8 @@
|
|
#include <asm/intel_rdt_sched.h>
|
|
#include <asm/proto.h>
|
|
|
|
+#include "process.h"
|
|
+
|
|
void __show_regs(struct pt_regs *regs, int all)
|
|
{
|
|
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
|
|
@@ -234,7 +236,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
struct fpu *prev_fpu = &prev->fpu;
|
|
struct fpu *next_fpu = &next->fpu;
|
|
int cpu = smp_processor_id();
|
|
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
|
|
|
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
|
|
|
@@ -266,12 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
|
|
set_iopl_mask(next->iopl);
|
|
|
|
- /*
|
|
- * Now maybe handle debug registers and/or IO bitmaps
|
|
- */
|
|
- if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
|
|
- task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
|
|
- __switch_to_xtra(prev_p, next_p, tss);
|
|
+ switch_to_extra(prev_p, next_p);
|
|
|
|
/*
|
|
* Leave lazy mode, flushing any hypercalls made here.
|
|
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
|
index cbeecfcc66d6..ec63d6be5e02 100644
|
|
--- a/arch/x86/kernel/process_64.c
|
|
+++ b/arch/x86/kernel/process_64.c
|
|
@@ -59,6 +59,8 @@
|
|
#include <asm/unistd_32_ia32.h>
|
|
#endif
|
|
|
|
+#include "process.h"
|
|
+
|
|
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
|
|
|
|
/* Prints also some state that isn't saved in the pt_regs */
|
|
@@ -400,7 +402,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
struct fpu *prev_fpu = &prev->fpu;
|
|
struct fpu *next_fpu = &next->fpu;
|
|
int cpu = smp_processor_id();
|
|
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
|
|
|
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
|
this_cpu_read(irq_count) != -1);
|
|
@@ -467,12 +468,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
/* Reload sp0. */
|
|
update_sp0(next_p);
|
|
|
|
- /*
|
|
- * Now maybe reload the debug registers and handle I/O bitmaps
|
|
- */
|
|
- if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
|
|
- task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
|
|
- __switch_to_xtra(prev_p, next_p, tss);
|
|
+ __switch_to_xtra(prev_p, next_p);
|
|
|
|
#ifdef CONFIG_XEN_PV
|
|
/*
|
|
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
|
|
index d1f5c744142b..bbcd69c76d96 100644
|
|
--- a/arch/x86/kvm/cpuid.c
|
|
+++ b/arch/x86/kvm/cpuid.c
|
|
@@ -367,7 +367,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
|
/* cpuid 0x80000008.ebx */
|
|
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
|
|
- F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
|
|
+ F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
|
|
+ F(AMD_SSB_NO);
|
|
|
|
/* cpuid 0xC0000001.edx */
|
|
const u32 kvm_cpuid_C000_0001_edx_x86_features =
|
|
@@ -649,7 +650,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
entry->ebx |= F(VIRT_SSBD);
|
|
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
|
|
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
|
|
- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
|
|
+ /*
|
|
+ * The preference is to use SPEC CTRL MSR instead of the
|
|
+ * VIRT_SPEC MSR.
|
|
+ */
|
|
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
|
|
+ !boot_cpu_has(X86_FEATURE_AMD_SSBD))
|
|
entry->ebx |= F(VIRT_SSBD);
|
|
break;
|
|
}
|
|
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
|
|
index d755e0d44ac1..364d9895dd56 100644
|
|
--- a/arch/x86/kvm/mmu.c
|
|
+++ b/arch/x86/kvm/mmu.c
|
|
@@ -4734,9 +4734,9 @@ static bool need_remote_flush(u64 old, u64 new)
|
|
}
|
|
|
|
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
|
- const u8 *new, int *bytes)
|
|
+ int *bytes)
|
|
{
|
|
- u64 gentry;
|
|
+ u64 gentry = 0;
|
|
int r;
|
|
|
|
/*
|
|
@@ -4748,22 +4748,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
|
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
|
|
*gpa &= ~(gpa_t)7;
|
|
*bytes = 8;
|
|
- r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
|
|
- if (r)
|
|
- gentry = 0;
|
|
- new = (const u8 *)&gentry;
|
|
}
|
|
|
|
- switch (*bytes) {
|
|
- case 4:
|
|
- gentry = *(const u32 *)new;
|
|
- break;
|
|
- case 8:
|
|
- gentry = *(const u64 *)new;
|
|
- break;
|
|
- default:
|
|
- gentry = 0;
|
|
- break;
|
|
+ if (*bytes == 4 || *bytes == 8) {
|
|
+ r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
|
|
+ if (r)
|
|
+ gentry = 0;
|
|
}
|
|
|
|
return gentry;
|
|
@@ -4876,8 +4866,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|
|
|
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
|
|
|
|
- gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
|
|
-
|
|
/*
|
|
* No need to care whether allocation memory is successful
|
|
* or not since pte prefetch is skiped if it does not have
|
|
@@ -4886,6 +4874,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|
mmu_topup_memory_caches(vcpu);
|
|
|
|
spin_lock(&vcpu->kvm->mmu_lock);
|
|
+
|
|
+ gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
|
|
+
|
|
++vcpu->kvm->stat.mmu_pte_write;
|
|
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
|
|
|
|
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
|
|
index f6bebcec60b4..17f08db34547 100644
|
|
--- a/arch/x86/kvm/svm.c
|
|
+++ b/arch/x86/kvm/svm.c
|
|
@@ -1733,21 +1733,31 @@ out:
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
+static void svm_clear_current_vmcb(struct vmcb *vmcb)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for_each_online_cpu(i)
|
|
+ cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
|
|
+}
|
|
+
|
|
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
|
|
+ /*
|
|
+ * The vmcb page can be recycled, causing a false negative in
|
|
+ * svm_vcpu_load(). So, ensure that no logical CPU has this
|
|
+ * vmcb page recorded as its current vmcb.
|
|
+ */
|
|
+ svm_clear_current_vmcb(svm->vmcb);
|
|
+
|
|
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
|
|
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
|
|
__free_page(virt_to_page(svm->nested.hsave));
|
|
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
|
|
kvm_vcpu_uninit(vcpu);
|
|
kmem_cache_free(kvm_vcpu_cache, svm);
|
|
- /*
|
|
- * The vmcb page can be recycled, causing a false negative in
|
|
- * svm_vcpu_load(). So do a full IBPB now.
|
|
- */
|
|
- indirect_branch_prediction_barrier();
|
|
}
|
|
|
|
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|
@@ -3644,7 +3654,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|
break;
|
|
case MSR_IA32_SPEC_CTRL:
|
|
if (!msr_info->host_initiated &&
|
|
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
|
|
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
|
|
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
|
|
return 1;
|
|
|
|
msr_info->data = svm->spec_ctrl;
|
|
@@ -3749,11 +3760,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|
break;
|
|
case MSR_IA32_SPEC_CTRL:
|
|
if (!msr->host_initiated &&
|
|
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
|
|
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
|
|
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
|
|
return 1;
|
|
|
|
/* The STIBP bit doesn't fault even if it's not advertised */
|
|
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
|
|
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
|
|
return 1;
|
|
|
|
svm->spec_ctrl = data;
|
|
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
|
|
index 8d688b213504..f24329659bea 100644
|
|
--- a/arch/x86/kvm/x86.c
|
|
+++ b/arch/x86/kvm/x86.c
|
|
@@ -6378,6 +6378,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
|
|
clock_pairing.nsec = ts.tv_nsec;
|
|
clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
|
|
clock_pairing.flags = 0;
|
|
+ memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
|
|
|
|
ret = 0;
|
|
if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
|
|
@@ -6884,7 +6885,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
|
else {
|
|
if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
|
|
kvm_x86_ops->sync_pir_to_irr(vcpu);
|
|
- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
|
+ if (ioapic_in_kernel(vcpu->kvm))
|
|
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
|
}
|
|
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
|
|
vcpu_to_synic(vcpu)->vec_bitmap, 256);
|
|
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
|
index 83a3f4c935fc..5400a24e1a8c 100644
|
|
--- a/arch/x86/mm/tlb.c
|
|
+++ b/arch/x86/mm/tlb.c
|
|
@@ -29,6 +29,12 @@
|
|
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
|
*/
|
|
|
|
+/*
|
|
+ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
|
|
+ * stored in cpu_tlb_state.last_user_mm_ibpb.
|
|
+ */
|
|
+#define LAST_USER_MM_IBPB 0x1UL
|
|
+
|
|
/*
|
|
* We get here when we do something requiring a TLB invalidation
|
|
* but could not go invalidate all of the contexts. We do the
|
|
@@ -180,6 +186,89 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
|
|
}
|
|
}
|
|
|
|
+static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
|
|
+{
|
|
+ unsigned long next_tif = task_thread_info(next)->flags;
|
|
+ unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
|
|
+
|
|
+ return (unsigned long)next->mm | ibpb;
|
|
+}
|
|
+
|
|
+static void cond_ibpb(struct task_struct *next)
|
|
+{
|
|
+ if (!next || !next->mm)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * Both, the conditional and the always IBPB mode use the mm
|
|
+ * pointer to avoid the IBPB when switching between tasks of the
|
|
+ * same process. Using the mm pointer instead of mm->context.ctx_id
|
|
+ * opens a hypothetical hole vs. mm_struct reuse, which is more or
|
|
+ * less impossible to control by an attacker. Aside of that it
|
|
+ * would only affect the first schedule so the theoretically
|
|
+ * exposed data is not really interesting.
|
|
+ */
|
|
+ if (static_branch_likely(&switch_mm_cond_ibpb)) {
|
|
+ unsigned long prev_mm, next_mm;
|
|
+
|
|
+ /*
|
|
+ * This is a bit more complex than the always mode because
|
|
+ * it has to handle two cases:
|
|
+ *
|
|
+ * 1) Switch from a user space task (potential attacker)
|
|
+ * which has TIF_SPEC_IB set to a user space task
|
|
+ * (potential victim) which has TIF_SPEC_IB not set.
|
|
+ *
|
|
+ * 2) Switch from a user space task (potential attacker)
|
|
+ * which has TIF_SPEC_IB not set to a user space task
|
|
+ * (potential victim) which has TIF_SPEC_IB set.
|
|
+ *
|
|
+ * This could be done by unconditionally issuing IBPB when
|
|
+ * a task which has TIF_SPEC_IB set is either scheduled in
|
|
+ * or out. Though that results in two flushes when:
|
|
+ *
|
|
+ * - the same user space task is scheduled out and later
|
|
+ * scheduled in again and only a kernel thread ran in
|
|
+ * between.
|
|
+ *
|
|
+ * - a user space task belonging to the same process is
|
|
+ * scheduled in after a kernel thread ran in between
|
|
+ *
|
|
+ * - a user space task belonging to the same process is
|
|
+ * scheduled in immediately.
|
|
+ *
|
|
+ * Optimize this with reasonably small overhead for the
|
|
+ * above cases. Mangle the TIF_SPEC_IB bit into the mm
|
|
+ * pointer of the incoming task which is stored in
|
|
+ * cpu_tlbstate.last_user_mm_ibpb for comparison.
|
|
+ */
|
|
+ next_mm = mm_mangle_tif_spec_ib(next);
|
|
+ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
|
|
+
|
|
+ /*
|
|
+ * Issue IBPB only if the mm's are different and one or
|
|
+ * both have the IBPB bit set.
|
|
+ */
|
|
+ if (next_mm != prev_mm &&
|
|
+ (next_mm | prev_mm) & LAST_USER_MM_IBPB)
|
|
+ indirect_branch_prediction_barrier();
|
|
+
|
|
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
|
|
+ }
|
|
+
|
|
+ if (static_branch_unlikely(&switch_mm_always_ibpb)) {
|
|
+ /*
|
|
+ * Only flush when switching to a user space task with a
|
|
+ * different context than the user space task which ran
|
|
+ * last on this CPU.
|
|
+ */
|
|
+ if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
|
|
+ indirect_branch_prediction_barrier();
|
|
+ this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
{
|
|
@@ -248,27 +337,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
} else {
|
|
u16 new_asid;
|
|
bool need_flush;
|
|
- u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
|
|
|
|
/*
|
|
* Avoid user/user BTB poisoning by flushing the branch
|
|
* predictor when switching between processes. This stops
|
|
* one process from doing Spectre-v2 attacks on another.
|
|
- *
|
|
- * As an optimization, flush indirect branches only when
|
|
- * switching into processes that disable dumping. This
|
|
- * protects high value processes like gpg, without having
|
|
- * too high performance overhead. IBPB is *expensive*!
|
|
- *
|
|
- * This will not flush branches when switching into kernel
|
|
- * threads. It will also not flush if we switch to idle
|
|
- * thread and back to the same process. It will flush if we
|
|
- * switch to a different non-dumpable process.
|
|
*/
|
|
- if (tsk && tsk->mm &&
|
|
- tsk->mm->context.ctx_id != last_ctx_id &&
|
|
- get_dumpable(tsk->mm) != SUID_DUMP_USER)
|
|
- indirect_branch_prediction_barrier();
|
|
+ cond_ibpb(tsk);
|
|
|
|
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
|
/*
|
|
@@ -318,14 +393,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
|
}
|
|
|
|
- /*
|
|
- * Record last user mm's context id, so we can avoid
|
|
- * flushing branch buffer with IBPB if we switch back
|
|
- * to the same user.
|
|
- */
|
|
- if (next != &init_mm)
|
|
- this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
|
|
-
|
|
/* Make sure we write CR3 before loaded_mm. */
|
|
barrier();
|
|
|
|
@@ -406,7 +473,7 @@ void initialize_tlbstate_and_flush(void)
|
|
write_cr3(build_cr3(mm->pgd, 0));
|
|
|
|
/* Reinitialize tlbstate. */
|
|
- this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
|
|
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
|
|
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
|
this_cpu_write(cpu_tlbstate.next_asid, 1);
|
|
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|
|
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
|
|
index bcb5beb81177..7df02fc934a9 100644
|
|
--- a/arch/xtensa/kernel/asm-offsets.c
|
|
+++ b/arch/xtensa/kernel/asm-offsets.c
|
|
@@ -91,14 +91,14 @@ int main(void)
|
|
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
|
|
DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
|
|
#if XTENSA_HAVE_COPROCESSORS
|
|
- DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
|
|
- DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
|
|
- DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
|
|
- DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
|
|
- DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
|
|
- DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
|
|
- DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
|
|
- DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
|
|
+ DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
|
|
+ DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
|
|
+ DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
|
|
+ DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
|
|
+ DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
|
|
+ DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
|
|
+ DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
|
|
+ DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
|
|
#endif
|
|
DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
|
|
DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
|
|
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
|
|
index ff4f0ecb03dd..f1c46bc5d465 100644
|
|
--- a/arch/xtensa/kernel/process.c
|
|
+++ b/arch/xtensa/kernel/process.c
|
|
@@ -88,18 +88,21 @@ void coprocessor_release_all(struct thread_info *ti)
|
|
|
|
void coprocessor_flush_all(struct thread_info *ti)
|
|
{
|
|
- unsigned long cpenable;
|
|
+ unsigned long cpenable, old_cpenable;
|
|
int i;
|
|
|
|
preempt_disable();
|
|
|
|
+ RSR_CPENABLE(old_cpenable);
|
|
cpenable = ti->cpenable;
|
|
+ WSR_CPENABLE(cpenable);
|
|
|
|
for (i = 0; i < XCHAL_CP_MAX; i++) {
|
|
if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
|
|
coprocessor_flush(ti, i);
|
|
cpenable >>= 1;
|
|
}
|
|
+ WSR_CPENABLE(old_cpenable);
|
|
|
|
preempt_enable();
|
|
}
|
|
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
|
|
index e2461968efb2..7c3ed7d78075 100644
|
|
--- a/arch/xtensa/kernel/ptrace.c
|
|
+++ b/arch/xtensa/kernel/ptrace.c
|
|
@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
|
|
}
|
|
|
|
|
|
+#if XTENSA_HAVE_COPROCESSORS
|
|
+#define CP_OFFSETS(cp) \
|
|
+ { \
|
|
+ .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
|
|
+ .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
|
|
+ .sz = sizeof(xtregs_ ## cp ## _t), \
|
|
+ }
|
|
+
|
|
+static const struct {
|
|
+ size_t elf_xtregs_offset;
|
|
+ size_t ti_offset;
|
|
+ size_t sz;
|
|
+} cp_offsets[] = {
|
|
+ CP_OFFSETS(cp0),
|
|
+ CP_OFFSETS(cp1),
|
|
+ CP_OFFSETS(cp2),
|
|
+ CP_OFFSETS(cp3),
|
|
+ CP_OFFSETS(cp4),
|
|
+ CP_OFFSETS(cp5),
|
|
+ CP_OFFSETS(cp6),
|
|
+ CP_OFFSETS(cp7),
|
|
+};
|
|
+#endif
|
|
+
|
|
static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
|
|
{
|
|
struct pt_regs *regs = task_pt_regs(child);
|
|
struct thread_info *ti = task_thread_info(child);
|
|
elf_xtregs_t __user *xtregs = uregs;
|
|
int ret = 0;
|
|
+ int i __maybe_unused;
|
|
|
|
if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
|
|
return -EIO;
|
|
@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
|
|
#if XTENSA_HAVE_COPROCESSORS
|
|
/* Flush all coprocessor registers to memory. */
|
|
coprocessor_flush_all(ti);
|
|
- ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
|
|
- sizeof(xtregs_coprocessor_t));
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
|
|
+ ret |= __copy_to_user((char __user *)xtregs +
|
|
+ cp_offsets[i].elf_xtregs_offset,
|
|
+ (const char *)ti +
|
|
+ cp_offsets[i].ti_offset,
|
|
+ cp_offsets[i].sz);
|
|
#endif
|
|
ret |= __copy_to_user(&xtregs->opt, ®s->xtregs_opt,
|
|
sizeof(xtregs->opt));
|
|
@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
|
|
struct pt_regs *regs = task_pt_regs(child);
|
|
elf_xtregs_t *xtregs = uregs;
|
|
int ret = 0;
|
|
+ int i __maybe_unused;
|
|
|
|
if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
|
|
return -EFAULT;
|
|
@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
|
|
coprocessor_flush_all(ti);
|
|
coprocessor_release_all(ti);
|
|
|
|
- ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
|
|
- sizeof(xtregs_coprocessor_t));
|
|
+ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
|
|
+ ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
|
|
+ (const char __user *)xtregs +
|
|
+ cp_offsets[i].elf_xtregs_offset,
|
|
+ cp_offsets[i].sz);
|
|
#endif
|
|
ret |= __copy_from_user(®s->xtregs_opt, &xtregs->opt,
|
|
sizeof(xtregs->opt));
|
|
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
|
|
index a86c27948fca..96a0f940e54d 100644
|
|
--- a/drivers/android/binder.c
|
|
+++ b/drivers/android/binder.c
|
|
@@ -2918,7 +2918,6 @@ static void binder_transaction(struct binder_proc *proc,
|
|
t->buffer = NULL;
|
|
goto err_binder_alloc_buf_failed;
|
|
}
|
|
- t->buffer->allow_user_free = 0;
|
|
t->buffer->debug_id = t->debug_id;
|
|
t->buffer->transaction = t;
|
|
t->buffer->target_node = target_node;
|
|
@@ -3407,14 +3406,18 @@ static int binder_thread_write(struct binder_proc *proc,
|
|
|
|
buffer = binder_alloc_prepare_to_free(&proc->alloc,
|
|
data_ptr);
|
|
- if (buffer == NULL) {
|
|
- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
|
|
- proc->pid, thread->pid, (u64)data_ptr);
|
|
- break;
|
|
- }
|
|
- if (!buffer->allow_user_free) {
|
|
- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
|
|
- proc->pid, thread->pid, (u64)data_ptr);
|
|
+ if (IS_ERR_OR_NULL(buffer)) {
|
|
+ if (PTR_ERR(buffer) == -EPERM) {
|
|
+ binder_user_error(
|
|
+ "%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
|
|
+ proc->pid, thread->pid,
|
|
+ (u64)data_ptr);
|
|
+ } else {
|
|
+ binder_user_error(
|
|
+ "%d:%d BC_FREE_BUFFER u%016llx no match\n",
|
|
+ proc->pid, thread->pid,
|
|
+ (u64)data_ptr);
|
|
+ }
|
|
break;
|
|
}
|
|
binder_debug(BINDER_DEBUG_FREE_BUFFER,
|
|
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
|
|
index 58e4658f9dd6..b9281f2725a6 100644
|
|
--- a/drivers/android/binder_alloc.c
|
|
+++ b/drivers/android/binder_alloc.c
|
|
@@ -149,14 +149,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
|
|
else {
|
|
/*
|
|
* Guard against user threads attempting to
|
|
- * free the buffer twice
|
|
+ * free the buffer when in use by kernel or
|
|
+ * after it's already been freed.
|
|
*/
|
|
- if (buffer->free_in_progress) {
|
|
- pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n",
|
|
- alloc->pid, current->pid, (u64)user_ptr);
|
|
- return NULL;
|
|
- }
|
|
- buffer->free_in_progress = 1;
|
|
+ if (!buffer->allow_user_free)
|
|
+ return ERR_PTR(-EPERM);
|
|
+ buffer->allow_user_free = 0;
|
|
return buffer;
|
|
}
|
|
}
|
|
@@ -486,7 +484,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc,
|
|
|
|
rb_erase(best_fit, &alloc->free_buffers);
|
|
buffer->free = 0;
|
|
- buffer->free_in_progress = 0;
|
|
+ buffer->allow_user_free = 0;
|
|
binder_insert_allocated_buffer_locked(alloc, buffer);
|
|
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
|
|
"%d: binder_alloc_buf size %zd got %pK\n",
|
|
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
|
|
index 2dd33b6df104..a3ad7683b6f2 100644
|
|
--- a/drivers/android/binder_alloc.h
|
|
+++ b/drivers/android/binder_alloc.h
|
|
@@ -50,8 +50,7 @@ struct binder_buffer {
|
|
unsigned free:1;
|
|
unsigned allow_user_free:1;
|
|
unsigned async_transaction:1;
|
|
- unsigned free_in_progress:1;
|
|
- unsigned debug_id:28;
|
|
+ unsigned debug_id:29;
|
|
|
|
struct binder_transaction *transaction;
|
|
|
|
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
|
|
index a861b5b4d443..21ed0e20c5d9 100644
|
|
--- a/drivers/dma/at_hdmac.c
|
|
+++ b/drivers/dma/at_hdmac.c
|
|
@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan)
|
|
atchan->descs_allocated = 0;
|
|
atchan->status = 0;
|
|
|
|
+ /*
|
|
+ * Free atslave allocated in at_dma_xlate()
|
|
+ */
|
|
+ kfree(chan->private);
|
|
+ chan->private = NULL;
|
|
+
|
|
dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
|
|
}
|
|
|
|
@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
|
|
dma_cap_zero(mask);
|
|
dma_cap_set(DMA_SLAVE, mask);
|
|
|
|
- atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
|
|
+ atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
|
|
if (!atslave)
|
|
return NULL;
|
|
|
|
@@ -2000,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev)
|
|
struct resource *io;
|
|
|
|
at_dma_off(atdma);
|
|
+ if (pdev->dev.of_node)
|
|
+ of_dma_controller_free(pdev->dev.of_node);
|
|
dma_async_device_unregister(&atdma->dma_common);
|
|
|
|
dma_pool_destroy(atdma->memset_pool);
|
|
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
|
|
index d96b09fea835..e05de5032f0c 100644
|
|
--- a/drivers/hv/channel.c
|
|
+++ b/drivers/hv/channel.c
|
|
@@ -454,6 +454,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
|
|
}
|
|
wait_for_completion(&msginfo->waitevent);
|
|
|
|
+ if (msginfo->response.gpadl_created.creation_status != 0) {
|
|
+ pr_err("Failed to establish GPADL: err = 0x%x\n",
|
|
+ msginfo->response.gpadl_created.creation_status);
|
|
+
|
|
+ ret = -EDQUOT;
|
|
+ goto cleanup;
|
|
+ }
|
|
+
|
|
if (channel->rescind) {
|
|
ret = -ENODEV;
|
|
goto cleanup;
|
|
diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
|
|
index 0a9e8fadfa9d..37ab30566464 100644
|
|
--- a/drivers/iio/magnetometer/st_magn_buffer.c
|
|
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
|
|
@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
|
|
return st_sensors_set_dataready_irq(indio_dev, state);
|
|
}
|
|
|
|
-static int st_magn_buffer_preenable(struct iio_dev *indio_dev)
|
|
-{
|
|
- return st_sensors_set_enable(indio_dev, true);
|
|
-}
|
|
-
|
|
static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
|
|
{
|
|
int err;
|
|
@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
|
|
if (err < 0)
|
|
goto st_magn_buffer_postenable_error;
|
|
|
|
- return err;
|
|
+ return st_sensors_set_enable(indio_dev, true);
|
|
|
|
st_magn_buffer_postenable_error:
|
|
kfree(mdata->buffer_data);
|
|
@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
|
|
int err;
|
|
struct st_sensor_data *mdata = iio_priv(indio_dev);
|
|
|
|
- err = iio_triggered_buffer_predisable(indio_dev);
|
|
+ err = st_sensors_set_enable(indio_dev, false);
|
|
if (err < 0)
|
|
goto st_magn_buffer_predisable_error;
|
|
|
|
- err = st_sensors_set_enable(indio_dev, false);
|
|
+ err = iio_triggered_buffer_predisable(indio_dev);
|
|
|
|
st_magn_buffer_predisable_error:
|
|
kfree(mdata->buffer_data);
|
|
@@ -75,7 +70,6 @@ st_magn_buffer_predisable_error:
|
|
}
|
|
|
|
static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
|
|
- .preenable = &st_magn_buffer_preenable,
|
|
.postenable = &st_magn_buffer_postenable,
|
|
.predisable = &st_magn_buffer_predisable,
|
|
};
|
|
diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c
|
|
index 4a7db623fe29..29cdaaf1ed90 100644
|
|
--- a/drivers/media/usb/em28xx/em28xx-dvb.c
|
|
+++ b/drivers/media/usb/em28xx/em28xx-dvb.c
|
|
@@ -2105,6 +2105,8 @@ static int em28xx_dvb_fini(struct em28xx *dev)
|
|
}
|
|
}
|
|
|
|
+ em28xx_unregister_dvb(dvb);
|
|
+
|
|
/* remove I2C SEC */
|
|
client = dvb->i2c_client_sec;
|
|
if (client) {
|
|
@@ -2126,7 +2128,6 @@ static int em28xx_dvb_fini(struct em28xx *dev)
|
|
i2c_unregister_device(client);
|
|
}
|
|
|
|
- em28xx_unregister_dvb(dvb);
|
|
kfree(dvb);
|
|
dev->dvb = NULL;
|
|
kref_put(&dev->ref, em28xx_free_device);
|
|
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
|
|
index 329727e00e97..95745dc4e0ec 100644
|
|
--- a/drivers/misc/mic/scif/scif_rma.c
|
|
+++ b/drivers/misc/mic/scif/scif_rma.c
|
|
@@ -417,7 +417,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev,
|
|
if (err)
|
|
goto error_window;
|
|
err = scif_map_page(&window->num_pages_lookup.lookup[j],
|
|
- vmalloc_dma_phys ?
|
|
+ vmalloc_num_pages ?
|
|
vmalloc_to_page(&window->num_pages[i]) :
|
|
virt_to_page(&window->num_pages[i]),
|
|
remote_dev);
|
|
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
|
|
index 94d7a865b135..7504f430c011 100644
|
|
--- a/drivers/mtd/ubi/vtbl.c
|
|
+++ b/drivers/mtd/ubi/vtbl.c
|
|
@@ -578,6 +578,16 @@ static int init_volumes(struct ubi_device *ubi,
|
|
vol->ubi = ubi;
|
|
reserved_pebs += vol->reserved_pebs;
|
|
|
|
+ /*
|
|
+ * We use ubi->peb_count and not vol->reserved_pebs because
|
|
+ * we want to keep the code simple. Otherwise we'd have to
|
|
+ * resize/check the bitmap upon volume resize too.
|
|
+ * Allocating a few bytes more does not hurt.
|
|
+ */
|
|
+ err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
/*
|
|
* In case of dynamic volume UBI knows nothing about how many
|
|
* data is stored there. So assume the whole volume is used.
|
|
@@ -620,16 +630,6 @@ static int init_volumes(struct ubi_device *ubi,
|
|
(long long)(vol->used_ebs - 1) * vol->usable_leb_size;
|
|
vol->used_bytes += av->last_data_size;
|
|
vol->last_eb_bytes = av->last_data_size;
|
|
-
|
|
- /*
|
|
- * We use ubi->peb_count and not vol->reserved_pebs because
|
|
- * we want to keep the code simple. Otherwise we'd have to
|
|
- * resize/check the bitmap upon volume resize too.
|
|
- * Allocating a few bytes more does not hurt.
|
|
- */
|
|
- err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
|
|
- if (err)
|
|
- return err;
|
|
}
|
|
|
|
/* And add the layout volume */
|
|
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
|
|
index 2237ef8e4344..f13256af8031 100644
|
|
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
|
|
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
|
|
@@ -1691,6 +1691,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
|
|
bool if_up = netif_running(nic->netdev);
|
|
struct bpf_prog *old_prog;
|
|
bool bpf_attached = false;
|
|
+ int ret = 0;
|
|
|
|
/* For now just support only the usual MTU sized frames */
|
|
if (prog && (dev->mtu > 1500)) {
|
|
@@ -1724,8 +1725,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
|
|
if (nic->xdp_prog) {
|
|
/* Attach BPF program */
|
|
nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
|
|
- if (!IS_ERR(nic->xdp_prog))
|
|
+ if (!IS_ERR(nic->xdp_prog)) {
|
|
bpf_attached = true;
|
|
+ } else {
|
|
+ ret = PTR_ERR(nic->xdp_prog);
|
|
+ nic->xdp_prog = NULL;
|
|
+ }
|
|
}
|
|
|
|
/* Calculate Tx queues needed for XDP and network stack */
|
|
@@ -1737,7 +1742,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
|
|
netif_trans_update(nic->netdev);
|
|
}
|
|
|
|
- return 0;
|
|
+ return ret;
|
|
}
|
|
|
|
static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
|
|
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
|
|
index a3d12dbde95b..09494e1c77c5 100644
|
|
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
|
|
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
|
|
@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
|
|
if (!sq->dmem.base)
|
|
return;
|
|
|
|
- if (sq->tso_hdrs)
|
|
+ if (sq->tso_hdrs) {
|
|
dma_free_coherent(&nic->pdev->dev,
|
|
sq->dmem.q_len * TSO_HEADER_SIZE,
|
|
sq->tso_hdrs, sq->tso_hdrs_phys);
|
|
+ sq->tso_hdrs = NULL;
|
|
+ }
|
|
|
|
/* Free pending skbs in the queue */
|
|
smp_rmb();
|
|
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
|
|
index e9f101c9bae2..bfbb39f93554 100644
|
|
--- a/drivers/net/rionet.c
|
|
+++ b/drivers/net/rionet.c
|
|
@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
|
* it just report sending a packet to the target
|
|
* (without actual packet transfer).
|
|
*/
|
|
- dev_kfree_skb_any(skb);
|
|
ndev->stats.tx_packets++;
|
|
ndev->stats.tx_bytes += skb->len;
|
|
+ dev_kfree_skb_any(skb);
|
|
}
|
|
}
|
|
|
|
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
|
|
index d49c7103085e..aabbcfb6e6da 100644
|
|
--- a/drivers/net/usb/ipheth.c
|
|
+++ b/drivers/net/usb/ipheth.c
|
|
@@ -140,7 +140,6 @@ struct ipheth_device {
|
|
struct usb_device *udev;
|
|
struct usb_interface *intf;
|
|
struct net_device *net;
|
|
- struct sk_buff *tx_skb;
|
|
struct urb *tx_urb;
|
|
struct urb *rx_urb;
|
|
unsigned char *tx_buf;
|
|
@@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
|
|
case -ENOENT:
|
|
case -ECONNRESET:
|
|
case -ESHUTDOWN:
|
|
+ case -EPROTO:
|
|
return;
|
|
case 0:
|
|
break;
|
|
@@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
|
|
dev_err(&dev->intf->dev, "%s: urb status: %d\n",
|
|
__func__, status);
|
|
|
|
- dev_kfree_skb_irq(dev->tx_skb);
|
|
netif_wake_queue(dev->net);
|
|
}
|
|
|
|
@@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
|
|
if (skb->len > IPHETH_BUF_SIZE) {
|
|
WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
|
|
dev->net->stats.tx_dropped++;
|
|
- dev_kfree_skb_irq(skb);
|
|
+ dev_kfree_skb_any(skb);
|
|
return NETDEV_TX_OK;
|
|
}
|
|
|
|
@@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
|
|
dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
|
|
__func__, retval);
|
|
dev->net->stats.tx_errors++;
|
|
- dev_kfree_skb_irq(skb);
|
|
+ dev_kfree_skb_any(skb);
|
|
} else {
|
|
- dev->tx_skb = skb;
|
|
-
|
|
dev->net->stats.tx_packets++;
|
|
dev->net->stats.tx_bytes += skb->len;
|
|
+ dev_consume_skb_any(skb);
|
|
netif_stop_queue(net);
|
|
}
|
|
|
|
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
|
|
index f528e9ac3413..0e8e3be50332 100644
|
|
--- a/drivers/net/virtio_net.c
|
|
+++ b/drivers/net/virtio_net.c
|
|
@@ -61,7 +61,8 @@ static const unsigned long guest_offloads[] = {
|
|
VIRTIO_NET_F_GUEST_TSO4,
|
|
VIRTIO_NET_F_GUEST_TSO6,
|
|
VIRTIO_NET_F_GUEST_ECN,
|
|
- VIRTIO_NET_F_GUEST_UFO
|
|
+ VIRTIO_NET_F_GUEST_UFO,
|
|
+ VIRTIO_NET_F_GUEST_CSUM
|
|
};
|
|
|
|
struct virtnet_stats {
|
|
@@ -1939,9 +1940,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
|
|
if (!vi->guest_offloads)
|
|
return 0;
|
|
|
|
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
|
|
- offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
|
|
-
|
|
return virtnet_set_guest_offloads(vi, offloads);
|
|
}
|
|
|
|
@@ -1951,8 +1949,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
|
|
|
|
if (!vi->guest_offloads)
|
|
return 0;
|
|
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
|
|
- offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
|
|
|
|
return virtnet_set_guest_offloads(vi, offloads);
|
|
}
|
|
@@ -1970,8 +1966,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
|
|
&& (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
|
|
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
|
|
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
|
|
- virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
|
|
- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
|
|
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
|
|
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
|
|
+ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
|
|
index ffdd2fa401b1..d63d7c326801 100644
|
|
--- a/drivers/net/wireless/ath/wil6210/wmi.c
|
|
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
|
|
@@ -1380,8 +1380,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie)
|
|
};
|
|
int rc;
|
|
u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len;
|
|
- struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL);
|
|
+ struct wmi_set_appie_cmd *cmd;
|
|
|
|
+ if (len < ie_len) {
|
|
+ rc = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cmd = kzalloc(len, GFP_KERNEL);
|
|
if (!cmd) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
|
|
index f48c3f62966d..761cf8573a80 100644
|
|
--- a/drivers/net/wireless/ti/wlcore/cmd.c
|
|
+++ b/drivers/net/wireless/ti/wlcore/cmd.c
|
|
@@ -35,7 +35,6 @@
|
|
#include "wl12xx_80211.h"
|
|
#include "cmd.h"
|
|
#include "event.h"
|
|
-#include "ps.h"
|
|
#include "tx.h"
|
|
#include "hw_ops.h"
|
|
|
|
@@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
|
|
|
|
timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT);
|
|
|
|
- ret = wl1271_ps_elp_wakeup(wl);
|
|
- if (ret < 0)
|
|
- return ret;
|
|
-
|
|
do {
|
|
if (time_after(jiffies, timeout_time)) {
|
|
wl1271_debug(DEBUG_CMD, "timeout waiting for event %d",
|
|
@@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
|
|
} while (!event);
|
|
|
|
out:
|
|
- wl1271_ps_elp_sleep(wl);
|
|
kfree(events_vector);
|
|
return ret;
|
|
}
|
|
diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c
|
|
index 87fa486bee2c..1ede4b60aac3 100644
|
|
--- a/drivers/pci/dwc/pci-layerscape.c
|
|
+++ b/drivers/pci/dwc/pci-layerscape.c
|
|
@@ -89,7 +89,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
|
|
int i;
|
|
|
|
for (i = 0; i < PCIE_IATU_NUM; i++)
|
|
- dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
|
|
+ dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND);
|
|
}
|
|
|
|
static int ls1021_pcie_link_up(struct dw_pcie *pci)
|
|
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
|
|
index 169dd7127f9e..69ef5f4060ed 100644
|
|
--- a/drivers/s390/net/qeth_core_main.c
|
|
+++ b/drivers/s390/net/qeth_core_main.c
|
|
@@ -4545,8 +4545,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
|
{
|
|
struct qeth_ipa_cmd *cmd;
|
|
struct qeth_arp_query_info *qinfo;
|
|
- struct qeth_snmp_cmd *snmp;
|
|
unsigned char *data;
|
|
+ void *snmp_data;
|
|
__u16 data_len;
|
|
|
|
QETH_CARD_TEXT(card, 3, "snpcmdcb");
|
|
@@ -4554,7 +4554,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
|
cmd = (struct qeth_ipa_cmd *) sdata;
|
|
data = (unsigned char *)((char *)cmd - reply->offset);
|
|
qinfo = (struct qeth_arp_query_info *) reply->param;
|
|
- snmp = &cmd->data.setadapterparms.data.snmp;
|
|
|
|
if (cmd->hdr.return_code) {
|
|
QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
|
|
@@ -4567,10 +4566,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
|
return 0;
|
|
}
|
|
data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
|
|
- if (cmd->data.setadapterparms.hdr.seq_no == 1)
|
|
- data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
|
|
- else
|
|
- data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
|
|
+ if (cmd->data.setadapterparms.hdr.seq_no == 1) {
|
|
+ snmp_data = &cmd->data.setadapterparms.data.snmp;
|
|
+ data_len -= offsetof(struct qeth_ipa_cmd,
|
|
+ data.setadapterparms.data.snmp);
|
|
+ } else {
|
|
+ snmp_data = &cmd->data.setadapterparms.data.snmp.request;
|
|
+ data_len -= offsetof(struct qeth_ipa_cmd,
|
|
+ data.setadapterparms.data.snmp.request);
|
|
+ }
|
|
|
|
/* check if there is enough room in userspace */
|
|
if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
|
|
@@ -4583,16 +4587,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
|
QETH_CARD_TEXT_(card, 4, "sseqn%i",
|
|
cmd->data.setadapterparms.hdr.seq_no);
|
|
/*copy entries to user buffer*/
|
|
- if (cmd->data.setadapterparms.hdr.seq_no == 1) {
|
|
- memcpy(qinfo->udata + qinfo->udata_offset,
|
|
- (char *)snmp,
|
|
- data_len + offsetof(struct qeth_snmp_cmd, data));
|
|
- qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
|
|
- } else {
|
|
- memcpy(qinfo->udata + qinfo->udata_offset,
|
|
- (char *)&snmp->request, data_len);
|
|
- }
|
|
+ memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
|
|
qinfo->udata_offset += data_len;
|
|
+
|
|
/* check if all replies received ... */
|
|
QETH_CARD_TEXT_(card, 4, "srtot%i",
|
|
cmd->data.setadapterparms.hdr.used_total);
|
|
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
|
|
index bd4352fe2de3..83852f323c5e 100644
|
|
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
|
|
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
|
|
@@ -1293,7 +1293,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
|
|
|
|
sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
|
|
sinfo->tx_packets = psta->sta_stats.tx_pkts;
|
|
-
|
|
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
|
|
}
|
|
|
|
/* for Ad-Hoc/AP mode */
|
|
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
|
|
index 314ffac50bb8..f05e9af4fe81 100644
|
|
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
|
|
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
|
|
@@ -1461,6 +1461,7 @@ vchiq_compat_ioctl_await_completion(struct file *file,
|
|
struct vchiq_await_completion32 args32;
|
|
struct vchiq_completion_data32 completion32;
|
|
unsigned int *msgbufcount32;
|
|
+ unsigned int msgbufcount_native;
|
|
compat_uptr_t msgbuf32;
|
|
void *msgbuf;
|
|
void **msgbufptr;
|
|
@@ -1572,7 +1573,11 @@ vchiq_compat_ioctl_await_completion(struct file *file,
|
|
sizeof(completion32)))
|
|
return -EFAULT;
|
|
|
|
- args32.msgbufcount--;
|
|
+ if (get_user(msgbufcount_native, &args->msgbufcount))
|
|
+ return -EFAULT;
|
|
+
|
|
+ if (!msgbufcount_native)
|
|
+ args32.msgbufcount--;
|
|
|
|
msgbufcount32 =
|
|
&((struct vchiq_await_completion32 __user *)arg)->msgbufcount;
|
|
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
|
|
index 1e8f68960014..808437c5ec49 100644
|
|
--- a/drivers/usb/core/quirks.c
|
|
+++ b/drivers/usb/core/quirks.c
|
|
@@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = {
|
|
/* Microsoft LifeCam-VX700 v2.0 */
|
|
{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
|
|
|
|
+ /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
|
|
+ { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
|
|
+
|
|
/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
|
|
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
|
|
{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
|
|
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
|
|
index ac8d619ff887..b8704c0678f9 100644
|
|
--- a/drivers/usb/dwc3/gadget.c
|
|
+++ b/drivers/usb/dwc3/gadget.c
|
|
@@ -1511,9 +1511,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
|
|
unsigned transfer_in_flight;
|
|
unsigned started;
|
|
|
|
- if (dep->flags & DWC3_EP_STALL)
|
|
- return 0;
|
|
-
|
|
if (dep->number > 1)
|
|
trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
|
|
else
|
|
@@ -1535,8 +1532,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
|
|
else
|
|
dep->flags |= DWC3_EP_STALL;
|
|
} else {
|
|
- if (!(dep->flags & DWC3_EP_STALL))
|
|
- return 0;
|
|
|
|
ret = dwc3_send_clear_stall_ep_cmd(dep);
|
|
if (ret)
|
|
diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h
|
|
index 8fe624ad302a..7ca779493671 100644
|
|
--- a/drivers/usb/storage/unusual_realtek.h
|
|
+++ b/drivers/usb/storage/unusual_realtek.h
|
|
@@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999,
|
|
"USB Card Reader",
|
|
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
|
|
|
|
+UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999,
|
|
+ "Realtek",
|
|
+ "USB Card Reader",
|
|
+ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
|
|
+
|
|
+UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999,
|
|
+ "Realtek",
|
|
+ "USB Card Reader",
|
|
+ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
|
|
+
|
|
#endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */
|
|
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
|
|
index f2cd9dedb037..195229df5ba0 100644
|
|
--- a/fs/btrfs/Makefile
|
|
+++ b/fs/btrfs/Makefile
|
|
@@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
|
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
|
|
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
|
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
|
- uuid-tree.o props.o hash.o free-space-tree.o
|
|
+ uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
|
|
|
|
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
|
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
|
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
|
|
index 0e67cee73c53..e42673477c25 100644
|
|
--- a/fs/btrfs/disk-io.c
|
|
+++ b/fs/btrfs/disk-io.c
|
|
@@ -50,6 +50,7 @@
|
|
#include "sysfs.h"
|
|
#include "qgroup.h"
|
|
#include "compression.h"
|
|
+#include "tree-checker.h"
|
|
|
|
#ifdef CONFIG_X86
|
|
#include <asm/cpufeature.h>
|
|
@@ -544,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
|
|
return ret;
|
|
}
|
|
|
|
-#define CORRUPT(reason, eb, root, slot) \
|
|
- btrfs_crit(root->fs_info, \
|
|
- "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
|
|
- btrfs_header_level(eb) == 0 ? "leaf" : "node", \
|
|
- reason, btrfs_header_bytenr(eb), root->objectid, slot)
|
|
-
|
|
-static noinline int check_leaf(struct btrfs_root *root,
|
|
- struct extent_buffer *leaf)
|
|
-{
|
|
- struct btrfs_fs_info *fs_info = root->fs_info;
|
|
- struct btrfs_key key;
|
|
- struct btrfs_key leaf_key;
|
|
- u32 nritems = btrfs_header_nritems(leaf);
|
|
- int slot;
|
|
-
|
|
- /*
|
|
- * Extent buffers from a relocation tree have a owner field that
|
|
- * corresponds to the subvolume tree they are based on. So just from an
|
|
- * extent buffer alone we can not find out what is the id of the
|
|
- * corresponding subvolume tree, so we can not figure out if the extent
|
|
- * buffer corresponds to the root of the relocation tree or not. So skip
|
|
- * this check for relocation trees.
|
|
- */
|
|
- if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
|
|
- struct btrfs_root *check_root;
|
|
-
|
|
- key.objectid = btrfs_header_owner(leaf);
|
|
- key.type = BTRFS_ROOT_ITEM_KEY;
|
|
- key.offset = (u64)-1;
|
|
-
|
|
- check_root = btrfs_get_fs_root(fs_info, &key, false);
|
|
- /*
|
|
- * The only reason we also check NULL here is that during
|
|
- * open_ctree() some roots has not yet been set up.
|
|
- */
|
|
- if (!IS_ERR_OR_NULL(check_root)) {
|
|
- struct extent_buffer *eb;
|
|
-
|
|
- eb = btrfs_root_node(check_root);
|
|
- /* if leaf is the root, then it's fine */
|
|
- if (leaf != eb) {
|
|
- CORRUPT("non-root leaf's nritems is 0",
|
|
- leaf, check_root, 0);
|
|
- free_extent_buffer(eb);
|
|
- return -EIO;
|
|
- }
|
|
- free_extent_buffer(eb);
|
|
- }
|
|
- return 0;
|
|
- }
|
|
-
|
|
- if (nritems == 0)
|
|
- return 0;
|
|
-
|
|
- /* Check the 0 item */
|
|
- if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
|
|
- BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
|
- CORRUPT("invalid item offset size pair", leaf, root, 0);
|
|
- return -EIO;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Check to make sure each items keys are in the correct order and their
|
|
- * offsets make sense. We only have to loop through nritems-1 because
|
|
- * we check the current slot against the next slot, which verifies the
|
|
- * next slot's offset+size makes sense and that the current's slot
|
|
- * offset is correct.
|
|
- */
|
|
- for (slot = 0; slot < nritems - 1; slot++) {
|
|
- btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
|
|
- btrfs_item_key_to_cpu(leaf, &key, slot + 1);
|
|
-
|
|
- /* Make sure the keys are in the right order */
|
|
- if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
|
|
- CORRUPT("bad key order", leaf, root, slot);
|
|
- return -EIO;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Make sure the offset and ends are right, remember that the
|
|
- * item data starts at the end of the leaf and grows towards the
|
|
- * front.
|
|
- */
|
|
- if (btrfs_item_offset_nr(leaf, slot) !=
|
|
- btrfs_item_end_nr(leaf, slot + 1)) {
|
|
- CORRUPT("slot offset bad", leaf, root, slot);
|
|
- return -EIO;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Check to make sure that we don't point outside of the leaf,
|
|
- * just in case all the items are consistent to each other, but
|
|
- * all point outside of the leaf.
|
|
- */
|
|
- if (btrfs_item_end_nr(leaf, slot) >
|
|
- BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
|
- CORRUPT("slot end outside of leaf", leaf, root, slot);
|
|
- return -EIO;
|
|
- }
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int check_node(struct btrfs_root *root, struct extent_buffer *node)
|
|
-{
|
|
- unsigned long nr = btrfs_header_nritems(node);
|
|
- struct btrfs_key key, next_key;
|
|
- int slot;
|
|
- u64 bytenr;
|
|
- int ret = 0;
|
|
-
|
|
- if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
|
|
- btrfs_crit(root->fs_info,
|
|
- "corrupt node: block %llu root %llu nritems %lu",
|
|
- node->start, root->objectid, nr);
|
|
- return -EIO;
|
|
- }
|
|
-
|
|
- for (slot = 0; slot < nr - 1; slot++) {
|
|
- bytenr = btrfs_node_blockptr(node, slot);
|
|
- btrfs_node_key_to_cpu(node, &key, slot);
|
|
- btrfs_node_key_to_cpu(node, &next_key, slot + 1);
|
|
-
|
|
- if (!bytenr) {
|
|
- CORRUPT("invalid item slot", node, root, slot);
|
|
- ret = -EIO;
|
|
- goto out;
|
|
- }
|
|
-
|
|
- if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
|
|
- CORRUPT("bad key order", node, root, slot);
|
|
- ret = -EIO;
|
|
- goto out;
|
|
- }
|
|
- }
|
|
-out:
|
|
- return ret;
|
|
-}
|
|
-
|
|
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
|
u64 phy_offset, struct page *page,
|
|
u64 start, u64 end, int mirror)
|
|
@@ -749,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
|
* that we don't try and read the other copies of this block, just
|
|
* return -EIO.
|
|
*/
|
|
- if (found_level == 0 && check_leaf(root, eb)) {
|
|
+ if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
|
|
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
|
|
ret = -EIO;
|
|
}
|
|
|
|
- if (found_level > 0 && check_node(root, eb))
|
|
+ if (found_level > 0 && btrfs_check_node(root, eb))
|
|
ret = -EIO;
|
|
|
|
if (!ret)
|
|
@@ -4009,7 +3870,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
|
|
buf->len,
|
|
fs_info->dirty_metadata_batch);
|
|
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
|
- if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
|
|
+ /*
|
|
+ * Since btrfs_mark_buffer_dirty() can be called with item pointer set
|
|
+ * but item data not updated.
|
|
+ * So here we should only check item pointers, not item data.
|
|
+ */
|
|
+ if (btrfs_header_level(buf) == 0 &&
|
|
+ btrfs_check_leaf_relaxed(root, buf)) {
|
|
btrfs_print_leaf(buf);
|
|
ASSERT(0);
|
|
}
|
|
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
|
|
index 2cb3569ac548..83791d13c204 100644
|
|
--- a/fs/btrfs/extent-tree.c
|
|
+++ b/fs/btrfs/extent-tree.c
|
|
@@ -9828,6 +9828,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
|
|
int ret = 0;
|
|
struct btrfs_key found_key;
|
|
struct extent_buffer *leaf;
|
|
+ struct btrfs_block_group_item bg;
|
|
+ u64 flags;
|
|
int slot;
|
|
|
|
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
|
|
@@ -9862,8 +9864,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
|
|
"logical %llu len %llu found bg but no related chunk",
|
|
found_key.objectid, found_key.offset);
|
|
ret = -ENOENT;
|
|
+ } else if (em->start != found_key.objectid ||
|
|
+ em->len != found_key.offset) {
|
|
+ btrfs_err(fs_info,
|
|
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
|
|
+ found_key.objectid, found_key.offset,
|
|
+ em->start, em->len);
|
|
+ ret = -EUCLEAN;
|
|
} else {
|
|
- ret = 0;
|
|
+ read_extent_buffer(leaf, &bg,
|
|
+ btrfs_item_ptr_offset(leaf, slot),
|
|
+ sizeof(bg));
|
|
+ flags = btrfs_block_group_flags(&bg) &
|
|
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
|
|
+
|
|
+ if (flags != (em->map_lookup->type &
|
|
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
|
+ btrfs_err(fs_info,
|
|
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
|
|
+ found_key.objectid,
|
|
+ found_key.offset, flags,
|
|
+ (BTRFS_BLOCK_GROUP_TYPE_MASK &
|
|
+ em->map_lookup->type));
|
|
+ ret = -EUCLEAN;
|
|
+ } else {
|
|
+ ret = 0;
|
|
+ }
|
|
}
|
|
free_extent_map(em);
|
|
goto out;
|
|
@@ -10092,6 +10118,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
|
|
return cache;
|
|
}
|
|
|
|
+
|
|
+/*
|
|
+ * Iterate all chunks and verify that each of them has the corresponding block
|
|
+ * group
|
|
+ */
|
|
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
|
|
+{
|
|
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
|
|
+ struct extent_map *em;
|
|
+ struct btrfs_block_group_cache *bg;
|
|
+ u64 start = 0;
|
|
+ int ret = 0;
|
|
+
|
|
+ while (1) {
|
|
+ read_lock(&map_tree->map_tree.lock);
|
|
+ /*
|
|
+ * lookup_extent_mapping will return the first extent map
|
|
+ * intersecting the range, so setting @len to 1 is enough to
|
|
+ * get the first chunk.
|
|
+ */
|
|
+ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
|
|
+ read_unlock(&map_tree->map_tree.lock);
|
|
+ if (!em)
|
|
+ break;
|
|
+
|
|
+ bg = btrfs_lookup_block_group(fs_info, em->start);
|
|
+ if (!bg) {
|
|
+ btrfs_err(fs_info,
|
|
+ "chunk start=%llu len=%llu doesn't have corresponding block group",
|
|
+ em->start, em->len);
|
|
+ ret = -EUCLEAN;
|
|
+ free_extent_map(em);
|
|
+ break;
|
|
+ }
|
|
+ if (bg->key.objectid != em->start ||
|
|
+ bg->key.offset != em->len ||
|
|
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
|
|
+ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
|
+ btrfs_err(fs_info,
|
|
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
|
|
+ em->start, em->len,
|
|
+ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
|
|
+ bg->key.objectid, bg->key.offset,
|
|
+ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
|
|
+ ret = -EUCLEAN;
|
|
+ free_extent_map(em);
|
|
+ btrfs_put_block_group(bg);
|
|
+ break;
|
|
+ }
|
|
+ start = em->start + em->len;
|
|
+ free_extent_map(em);
|
|
+ btrfs_put_block_group(bg);
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
|
{
|
|
struct btrfs_path *path;
|
|
@@ -10264,7 +10346,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
|
}
|
|
|
|
init_global_block_rsv(info);
|
|
- ret = 0;
|
|
+ ret = check_chunk_block_group_mappings(info);
|
|
error:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
|
|
index eeae2c3ab17e..5feb8b03ffe8 100644
|
|
--- a/fs/btrfs/relocation.c
|
|
+++ b/fs/btrfs/relocation.c
|
|
@@ -4048,6 +4048,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
|
|
restart:
|
|
if (update_backref_cache(trans, &rc->backref_cache)) {
|
|
btrfs_end_transaction(trans);
|
|
+ trans = NULL;
|
|
continue;
|
|
}
|
|
|
|
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
|
|
index fe960d5e8913..49a02bf091ae 100644
|
|
--- a/fs/btrfs/super.c
|
|
+++ b/fs/btrfs/super.c
|
|
@@ -2176,6 +2176,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
|
vol = memdup_user((void __user *)arg, sizeof(*vol));
|
|
if (IS_ERR(vol))
|
|
return PTR_ERR(vol);
|
|
+ vol->name[BTRFS_PATH_NAME_MAX] = '\0';
|
|
|
|
switch (cmd) {
|
|
case BTRFS_IOC_SCAN_DEV:
|
|
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
|
|
index f74005ca8f08..73c1fbca0c35 100644
|
|
--- a/fs/btrfs/transaction.c
|
|
+++ b/fs/btrfs/transaction.c
|
|
@@ -1955,6 +1955,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
|
return ret;
|
|
}
|
|
|
|
+ btrfs_trans_release_metadata(trans, fs_info);
|
|
+ trans->block_rsv = NULL;
|
|
+
|
|
/* make a pass through all the delayed refs we have so far
|
|
* any runnings procs may add more while we are here
|
|
*/
|
|
@@ -1964,9 +1967,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
|
return ret;
|
|
}
|
|
|
|
- btrfs_trans_release_metadata(trans, fs_info);
|
|
- trans->block_rsv = NULL;
|
|
-
|
|
cur_trans = trans->transaction;
|
|
|
|
/*
|
|
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
|
|
new file mode 100644
|
|
index 000000000000..f206aec1525d
|
|
--- /dev/null
|
|
+++ b/fs/btrfs/tree-checker.c
|
|
@@ -0,0 +1,649 @@
|
|
+/*
|
|
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public
|
|
+ * License v2 as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public
|
|
+ * License along with this program.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * The module is used to catch unexpected/corrupted tree block data.
|
|
+ * Such behavior can be caused either by a fuzzed image or bugs.
|
|
+ *
|
|
+ * The objective is to do leaf/node validation checks when tree block is read
|
|
+ * from disk, and check *every* possible member, so other code won't
|
|
+ * need to checking them again.
|
|
+ *
|
|
+ * Due to the potential and unwanted damage, every checker needs to be
|
|
+ * carefully reviewed otherwise so it does not prevent mount of valid images.
|
|
+ */
|
|
+
|
|
+#include "ctree.h"
|
|
+#include "tree-checker.h"
|
|
+#include "disk-io.h"
|
|
+#include "compression.h"
|
|
+#include "hash.h"
|
|
+#include "volumes.h"
|
|
+
|
|
+#define CORRUPT(reason, eb, root, slot) \
|
|
+ btrfs_crit(root->fs_info, \
|
|
+ "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
|
|
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", \
|
|
+ reason, btrfs_header_bytenr(eb), root->objectid, slot)
|
|
+
|
|
+/*
|
|
+ * Error message should follow the following format:
|
|
+ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
|
|
+ *
|
|
+ * @type: leaf or node
|
|
+ * @identifier: the necessary info to locate the leaf/node.
|
|
+ * It's recommened to decode key.objecitd/offset if it's
|
|
+ * meaningful.
|
|
+ * @reason: describe the error
|
|
+ * @bad_value: optional, it's recommened to output bad value and its
|
|
+ * expected value (range).
|
|
+ *
|
|
+ * Since comma is used to separate the components, only space is allowed
|
|
+ * inside each component.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
|
|
+ * Allows callers to customize the output.
|
|
+ */
|
|
+__printf(4, 5)
|
|
+static void generic_err(const struct btrfs_root *root,
|
|
+ const struct extent_buffer *eb, int slot,
|
|
+ const char *fmt, ...)
|
|
+{
|
|
+ struct va_format vaf;
|
|
+ va_list args;
|
|
+
|
|
+ va_start(args, fmt);
|
|
+
|
|
+ vaf.fmt = fmt;
|
|
+ vaf.va = &args;
|
|
+
|
|
+ btrfs_crit(root->fs_info,
|
|
+ "corrupt %s: root=%llu block=%llu slot=%d, %pV",
|
|
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
|
|
+ root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
|
|
+ va_end(args);
|
|
+}
|
|
+
|
|
+static int check_extent_data_item(struct btrfs_root *root,
|
|
+ struct extent_buffer *leaf,
|
|
+ struct btrfs_key *key, int slot)
|
|
+{
|
|
+ struct btrfs_file_extent_item *fi;
|
|
+ u32 sectorsize = root->fs_info->sectorsize;
|
|
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
|
|
+
|
|
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
|
|
+ CORRUPT("unaligned key offset for file extent",
|
|
+ leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
|
+
|
|
+ if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
|
|
+ CORRUPT("invalid file extent type", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Support for new compression/encrption must introduce incompat flag,
|
|
+ * and must be caught in open_ctree().
|
|
+ */
|
|
+ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
|
|
+ CORRUPT("invalid file extent compression", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (btrfs_file_extent_encryption(leaf, fi)) {
|
|
+ CORRUPT("invalid file extent encryption", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
|
|
+ /* Inline extent must have 0 as key offset */
|
|
+ if (key->offset) {
|
|
+ CORRUPT("inline extent has non-zero key offset",
|
|
+ leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /* Compressed inline extent has no on-disk size, skip it */
|
|
+ if (btrfs_file_extent_compression(leaf, fi) !=
|
|
+ BTRFS_COMPRESS_NONE)
|
|
+ return 0;
|
|
+
|
|
+ /* Uncompressed inline extent size must match item size */
|
|
+ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
|
|
+ btrfs_file_extent_ram_bytes(leaf, fi)) {
|
|
+ CORRUPT("plaintext inline extent has invalid size",
|
|
+ leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* Regular or preallocated extent has fixed item size */
|
|
+ if (item_size != sizeof(*fi)) {
|
|
+ CORRUPT(
|
|
+ "regluar or preallocated extent data item size is invalid",
|
|
+ leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
|
|
+ !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
|
|
+ !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
|
|
+ !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
|
|
+ !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
|
|
+ CORRUPT(
|
|
+ "regular or preallocated extent data item has unaligned value",
|
|
+ leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
|
|
+ struct btrfs_key *key, int slot)
|
|
+{
|
|
+ u32 sectorsize = root->fs_info->sectorsize;
|
|
+ u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
|
|
+
|
|
+ if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
|
|
+ CORRUPT("invalid objectid for csum item", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
|
|
+ CORRUPT("unaligned key offset for csum item", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
|
|
+ CORRUPT("unaligned csum item size", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Customized reported for dir_item, only important new info is key->objectid,
|
|
+ * which represents inode number
|
|
+ */
|
|
+__printf(4, 5)
|
|
+static void dir_item_err(const struct btrfs_root *root,
|
|
+ const struct extent_buffer *eb, int slot,
|
|
+ const char *fmt, ...)
|
|
+{
|
|
+ struct btrfs_key key;
|
|
+ struct va_format vaf;
|
|
+ va_list args;
|
|
+
|
|
+ btrfs_item_key_to_cpu(eb, &key, slot);
|
|
+ va_start(args, fmt);
|
|
+
|
|
+ vaf.fmt = fmt;
|
|
+ vaf.va = &args;
|
|
+
|
|
+ btrfs_crit(root->fs_info,
|
|
+ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
|
|
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
|
|
+ btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
|
|
+ va_end(args);
|
|
+}
|
|
+
|
|
+static int check_dir_item(struct btrfs_root *root,
|
|
+ struct extent_buffer *leaf,
|
|
+ struct btrfs_key *key, int slot)
|
|
+{
|
|
+ struct btrfs_dir_item *di;
|
|
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
|
|
+ u32 cur = 0;
|
|
+
|
|
+ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
|
+ while (cur < item_size) {
|
|
+ u32 name_len;
|
|
+ u32 data_len;
|
|
+ u32 max_name_len;
|
|
+ u32 total_size;
|
|
+ u32 name_hash;
|
|
+ u8 dir_type;
|
|
+
|
|
+ /* header itself should not cross item boundary */
|
|
+ if (cur + sizeof(*di) > item_size) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "dir item header crosses item boundary, have %zu boundary %u",
|
|
+ cur + sizeof(*di), item_size);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /* dir type check */
|
|
+ dir_type = btrfs_dir_type(leaf, di);
|
|
+ if (dir_type >= BTRFS_FT_MAX) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "invalid dir item type, have %u expect [0, %u)",
|
|
+ dir_type, BTRFS_FT_MAX);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ if (key->type == BTRFS_XATTR_ITEM_KEY &&
|
|
+ dir_type != BTRFS_FT_XATTR) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "invalid dir item type for XATTR key, have %u expect %u",
|
|
+ dir_type, BTRFS_FT_XATTR);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (dir_type == BTRFS_FT_XATTR &&
|
|
+ key->type != BTRFS_XATTR_ITEM_KEY) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "xattr dir type found for non-XATTR key");
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (dir_type == BTRFS_FT_XATTR)
|
|
+ max_name_len = XATTR_NAME_MAX;
|
|
+ else
|
|
+ max_name_len = BTRFS_NAME_LEN;
|
|
+
|
|
+ /* Name/data length check */
|
|
+ name_len = btrfs_dir_name_len(leaf, di);
|
|
+ data_len = btrfs_dir_data_len(leaf, di);
|
|
+ if (name_len > max_name_len) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "dir item name len too long, have %u max %u",
|
|
+ name_len, max_name_len);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "dir item name and data len too long, have %u max %u",
|
|
+ name_len + data_len,
|
|
+ BTRFS_MAX_XATTR_SIZE(root->fs_info));
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ if (data_len && dir_type != BTRFS_FT_XATTR) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "dir item with invalid data len, have %u expect 0",
|
|
+ data_len);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ total_size = sizeof(*di) + name_len + data_len;
|
|
+
|
|
+ /* header and name/data should not cross item boundary */
|
|
+ if (cur + total_size > item_size) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "dir item data crosses item boundary, have %u boundary %u",
|
|
+ cur + total_size, item_size);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Special check for XATTR/DIR_ITEM, as key->offset is name
|
|
+ * hash, should match its name
|
|
+ */
|
|
+ if (key->type == BTRFS_DIR_ITEM_KEY ||
|
|
+ key->type == BTRFS_XATTR_ITEM_KEY) {
|
|
+ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
|
|
+
|
|
+ read_extent_buffer(leaf, namebuf,
|
|
+ (unsigned long)(di + 1), name_len);
|
|
+ name_hash = btrfs_name_hash(namebuf, name_len);
|
|
+ if (key->offset != name_hash) {
|
|
+ dir_item_err(root, leaf, slot,
|
|
+ "name hash mismatch with key, have 0x%016x expect 0x%016llx",
|
|
+ name_hash, key->offset);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ }
|
|
+ cur += total_size;
|
|
+ di = (struct btrfs_dir_item *)((void *)di + total_size);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+__printf(4, 5)
|
|
+__cold
|
|
+static void block_group_err(const struct btrfs_fs_info *fs_info,
|
|
+ const struct extent_buffer *eb, int slot,
|
|
+ const char *fmt, ...)
|
|
+{
|
|
+ struct btrfs_key key;
|
|
+ struct va_format vaf;
|
|
+ va_list args;
|
|
+
|
|
+ btrfs_item_key_to_cpu(eb, &key, slot);
|
|
+ va_start(args, fmt);
|
|
+
|
|
+ vaf.fmt = fmt;
|
|
+ vaf.va = &args;
|
|
+
|
|
+ btrfs_crit(fs_info,
|
|
+ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
|
|
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
|
|
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
|
|
+ key.objectid, key.offset, &vaf);
|
|
+ va_end(args);
|
|
+}
|
|
+
|
|
+static int check_block_group_item(struct btrfs_fs_info *fs_info,
|
|
+ struct extent_buffer *leaf,
|
|
+ struct btrfs_key *key, int slot)
|
|
+{
|
|
+ struct btrfs_block_group_item bgi;
|
|
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
|
|
+ u64 flags;
|
|
+ u64 type;
|
|
+
|
|
+ /*
|
|
+ * Here we don't really care about alignment since extent allocator can
|
|
+ * handle it. We care more about the size, as if one block group is
|
|
+ * larger than maximum size, it's must be some obvious corruption.
|
|
+ */
|
|
+ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
|
|
+ block_group_err(fs_info, leaf, slot,
|
|
+ "invalid block group size, have %llu expect (0, %llu]",
|
|
+ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ if (item_size != sizeof(bgi)) {
|
|
+ block_group_err(fs_info, leaf, slot,
|
|
+ "invalid item size, have %u expect %zu",
|
|
+ item_size, sizeof(bgi));
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
|
|
+ sizeof(bgi));
|
|
+ if (btrfs_block_group_chunk_objectid(&bgi) !=
|
|
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
|
|
+ block_group_err(fs_info, leaf, slot,
|
|
+ "invalid block group chunk objectid, have %llu expect %llu",
|
|
+ btrfs_block_group_chunk_objectid(&bgi),
|
|
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ if (btrfs_block_group_used(&bgi) > key->offset) {
|
|
+ block_group_err(fs_info, leaf, slot,
|
|
+ "invalid block group used, have %llu expect [0, %llu)",
|
|
+ btrfs_block_group_used(&bgi), key->offset);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ flags = btrfs_block_group_flags(&bgi);
|
|
+ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
|
|
+ block_group_err(fs_info, leaf, slot,
|
|
+"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
|
|
+ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
|
|
+ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
|
|
+ if (type != BTRFS_BLOCK_GROUP_DATA &&
|
|
+ type != BTRFS_BLOCK_GROUP_METADATA &&
|
|
+ type != BTRFS_BLOCK_GROUP_SYSTEM &&
|
|
+ type != (BTRFS_BLOCK_GROUP_METADATA |
|
|
+ BTRFS_BLOCK_GROUP_DATA)) {
|
|
+ block_group_err(fs_info, leaf, slot,
|
|
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
|
|
+ type, hweight64(type),
|
|
+ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
|
|
+ BTRFS_BLOCK_GROUP_SYSTEM,
|
|
+ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Common point to switch the item-specific validation.
|
|
+ */
|
|
+static int check_leaf_item(struct btrfs_root *root,
|
|
+ struct extent_buffer *leaf,
|
|
+ struct btrfs_key *key, int slot)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ switch (key->type) {
|
|
+ case BTRFS_EXTENT_DATA_KEY:
|
|
+ ret = check_extent_data_item(root, leaf, key, slot);
|
|
+ break;
|
|
+ case BTRFS_EXTENT_CSUM_KEY:
|
|
+ ret = check_csum_item(root, leaf, key, slot);
|
|
+ break;
|
|
+ case BTRFS_DIR_ITEM_KEY:
|
|
+ case BTRFS_DIR_INDEX_KEY:
|
|
+ case BTRFS_XATTR_ITEM_KEY:
|
|
+ ret = check_dir_item(root, leaf, key, slot);
|
|
+ break;
|
|
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
|
|
+ ret = check_block_group_item(root->fs_info, leaf, key, slot);
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
|
|
+ bool check_item_data)
|
|
+{
|
|
+ struct btrfs_fs_info *fs_info = root->fs_info;
|
|
+ /* No valid key type is 0, so all key should be larger than this key */
|
|
+ struct btrfs_key prev_key = {0, 0, 0};
|
|
+ struct btrfs_key key;
|
|
+ u32 nritems = btrfs_header_nritems(leaf);
|
|
+ int slot;
|
|
+
|
|
+ if (btrfs_header_level(leaf) != 0) {
|
|
+ generic_err(root, leaf, 0,
|
|
+ "invalid level for leaf, have %d expect 0",
|
|
+ btrfs_header_level(leaf));
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Extent buffers from a relocation tree have a owner field that
|
|
+ * corresponds to the subvolume tree they are based on. So just from an
|
|
+ * extent buffer alone we can not find out what is the id of the
|
|
+ * corresponding subvolume tree, so we can not figure out if the extent
|
|
+ * buffer corresponds to the root of the relocation tree or not. So
|
|
+ * skip this check for relocation trees.
|
|
+ */
|
|
+ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
|
|
+ u64 owner = btrfs_header_owner(leaf);
|
|
+ struct btrfs_root *check_root;
|
|
+
|
|
+ /* These trees must never be empty */
|
|
+ if (owner == BTRFS_ROOT_TREE_OBJECTID ||
|
|
+ owner == BTRFS_CHUNK_TREE_OBJECTID ||
|
|
+ owner == BTRFS_EXTENT_TREE_OBJECTID ||
|
|
+ owner == BTRFS_DEV_TREE_OBJECTID ||
|
|
+ owner == BTRFS_FS_TREE_OBJECTID ||
|
|
+ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
|
+ generic_err(root, leaf, 0,
|
|
+ "invalid root, root %llu must never be empty",
|
|
+ owner);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ key.objectid = owner;
|
|
+ key.type = BTRFS_ROOT_ITEM_KEY;
|
|
+ key.offset = (u64)-1;
|
|
+
|
|
+ check_root = btrfs_get_fs_root(fs_info, &key, false);
|
|
+ /*
|
|
+ * The only reason we also check NULL here is that during
|
|
+ * open_ctree() some roots has not yet been set up.
|
|
+ */
|
|
+ if (!IS_ERR_OR_NULL(check_root)) {
|
|
+ struct extent_buffer *eb;
|
|
+
|
|
+ eb = btrfs_root_node(check_root);
|
|
+ /* if leaf is the root, then it's fine */
|
|
+ if (leaf != eb) {
|
|
+ CORRUPT("non-root leaf's nritems is 0",
|
|
+ leaf, check_root, 0);
|
|
+ free_extent_buffer(eb);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ free_extent_buffer(eb);
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (nritems == 0)
|
|
+ return 0;
|
|
+
|
|
+ /*
|
|
+ * Check the following things to make sure this is a good leaf, and
|
|
+ * leaf users won't need to bother with similar sanity checks:
|
|
+ *
|
|
+ * 1) key ordering
|
|
+ * 2) item offset and size
|
|
+ * No overlap, no hole, all inside the leaf.
|
|
+ * 3) item content
|
|
+ * If possible, do comprehensive sanity check.
|
|
+ * NOTE: All checks must only rely on the item data itself.
|
|
+ */
|
|
+ for (slot = 0; slot < nritems; slot++) {
|
|
+ u32 item_end_expected;
|
|
+ int ret;
|
|
+
|
|
+ btrfs_item_key_to_cpu(leaf, &key, slot);
|
|
+
|
|
+ /* Make sure the keys are in the right order */
|
|
+ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
|
|
+ CORRUPT("bad key order", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Make sure the offset and ends are right, remember that the
|
|
+ * item data starts at the end of the leaf and grows towards the
|
|
+ * front.
|
|
+ */
|
|
+ if (slot == 0)
|
|
+ item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
|
|
+ else
|
|
+ item_end_expected = btrfs_item_offset_nr(leaf,
|
|
+ slot - 1);
|
|
+ if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
|
|
+ CORRUPT("slot offset bad", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Check to make sure that we don't point outside of the leaf,
|
|
+ * just in case all the items are consistent to each other, but
|
|
+ * all point outside of the leaf.
|
|
+ */
|
|
+ if (btrfs_item_end_nr(leaf, slot) >
|
|
+ BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
|
+ CORRUPT("slot end outside of leaf", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ /* Also check if the item pointer overlaps with btrfs item. */
|
|
+ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
|
|
+ btrfs_item_ptr_offset(leaf, slot)) {
|
|
+ CORRUPT("slot overlap with its data", leaf, root, slot);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ if (check_item_data) {
|
|
+ /*
|
|
+ * Check if the item size and content meet other
|
|
+ * criteria
|
|
+ */
|
|
+ ret = check_leaf_item(root, leaf, &key, slot);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ prev_key.objectid = key.objectid;
|
|
+ prev_key.type = key.type;
|
|
+ prev_key.offset = key.offset;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
|
|
+{
|
|
+ return check_leaf(root, leaf, true);
|
|
+}
|
|
+
|
|
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
|
|
+ struct extent_buffer *leaf)
|
|
+{
|
|
+ return check_leaf(root, leaf, false);
|
|
+}
|
|
+
|
|
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
|
|
+{
|
|
+ unsigned long nr = btrfs_header_nritems(node);
|
|
+ struct btrfs_key key, next_key;
|
|
+ int slot;
|
|
+ int level = btrfs_header_level(node);
|
|
+ u64 bytenr;
|
|
+ int ret = 0;
|
|
+
|
|
+ if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
|
|
+ generic_err(root, node, 0,
|
|
+ "invalid level for node, have %d expect [1, %d]",
|
|
+ level, BTRFS_MAX_LEVEL - 1);
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
|
|
+ btrfs_crit(root->fs_info,
|
|
+"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
|
|
+ root->objectid, node->start,
|
|
+ nr == 0 ? "small" : "large", nr,
|
|
+ BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
|
|
+ return -EUCLEAN;
|
|
+ }
|
|
+
|
|
+ for (slot = 0; slot < nr - 1; slot++) {
|
|
+ bytenr = btrfs_node_blockptr(node, slot);
|
|
+ btrfs_node_key_to_cpu(node, &key, slot);
|
|
+ btrfs_node_key_to_cpu(node, &next_key, slot + 1);
|
|
+
|
|
+ if (!bytenr) {
|
|
+ generic_err(root, node, slot,
|
|
+ "invalid NULL node pointer");
|
|
+ ret = -EUCLEAN;
|
|
+ goto out;
|
|
+ }
|
|
+ if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
|
|
+ generic_err(root, node, slot,
|
|
+ "unaligned pointer, have %llu should be aligned to %u",
|
|
+ bytenr, root->fs_info->sectorsize);
|
|
+ ret = -EUCLEAN;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
|
|
+ generic_err(root, node, slot,
|
|
+ "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
|
|
+ key.objectid, key.type, key.offset,
|
|
+ next_key.objectid, next_key.type,
|
|
+ next_key.offset);
|
|
+ ret = -EUCLEAN;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
|
|
new file mode 100644
|
|
index 000000000000..3d53e8d6fda0
|
|
--- /dev/null
|
|
+++ b/fs/btrfs/tree-checker.h
|
|
@@ -0,0 +1,38 @@
|
|
+/*
|
|
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public
|
|
+ * License v2 as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public
|
|
+ * License along with this program.
|
|
+ */
|
|
+
|
|
+#ifndef __BTRFS_TREE_CHECKER__
|
|
+#define __BTRFS_TREE_CHECKER__
|
|
+
|
|
+#include "ctree.h"
|
|
+#include "extent_io.h"
|
|
+
|
|
+/*
|
|
+ * Comprehensive leaf checker.
|
|
+ * Will check not only the item pointers, but also every possible member
|
|
+ * in item data.
|
|
+ */
|
|
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
|
|
+
|
|
+/*
|
|
+ * Less strict leaf checker.
|
|
+ * Will only check item pointers, not reading item data.
|
|
+ */
|
|
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
|
|
+ struct extent_buffer *leaf);
|
|
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
|
|
+
|
|
+#endif
|
|
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
|
|
index a0947f4a3e87..9663b6aa2a56 100644
|
|
--- a/fs/btrfs/volumes.c
|
|
+++ b/fs/btrfs/volumes.c
|
|
@@ -4647,7 +4647,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
|
|
|
if (type & BTRFS_BLOCK_GROUP_DATA) {
|
|
max_stripe_size = SZ_1G;
|
|
- max_chunk_size = 10 * max_stripe_size;
|
|
+ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
|
|
if (!devs_max)
|
|
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
|
|
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
|
|
@@ -6353,6 +6353,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
|
|
u16 num_stripes;
|
|
u16 sub_stripes;
|
|
u64 type;
|
|
+ u64 features;
|
|
+ bool mixed = false;
|
|
|
|
length = btrfs_chunk_length(leaf, chunk);
|
|
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
|
|
@@ -6391,6 +6393,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
|
|
btrfs_chunk_type(leaf, chunk));
|
|
return -EIO;
|
|
}
|
|
+
|
|
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
|
|
+ btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
|
|
+ return -EIO;
|
|
+ }
|
|
+
|
|
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
|
|
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
|
|
+ btrfs_err(fs_info,
|
|
+ "system chunk with data or metadata type: 0x%llx", type);
|
|
+ return -EIO;
|
|
+ }
|
|
+
|
|
+ features = btrfs_super_incompat_flags(fs_info->super_copy);
|
|
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
|
|
+ mixed = true;
|
|
+
|
|
+ if (!mixed) {
|
|
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
|
|
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
|
|
+ btrfs_err(fs_info,
|
|
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
|
|
+ return -EIO;
|
|
+ }
|
|
+ }
|
|
+
|
|
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
|
|
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
|
|
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
|
|
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
|
|
index c5dd48eb7b3d..76fb6e84f201 100644
|
|
--- a/fs/btrfs/volumes.h
|
|
+++ b/fs/btrfs/volumes.h
|
|
@@ -24,6 +24,8 @@
|
|
#include <linux/btrfs.h>
|
|
#include "async-thread.h"
|
|
|
|
+#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
|
|
+
|
|
extern struct mutex uuid_mutex;
|
|
|
|
#define BTRFS_STRIPE_LEN SZ_64K
|
|
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
|
|
index bf378ddca4db..a48984dd6426 100644
|
|
--- a/fs/ceph/mds_client.c
|
|
+++ b/fs/ceph/mds_client.c
|
|
@@ -4079,6 +4079,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
|
|
return auth;
|
|
}
|
|
|
|
+static int add_authorizer_challenge(struct ceph_connection *con,
|
|
+ void *challenge_buf, int challenge_buf_len)
|
|
+{
|
|
+ struct ceph_mds_session *s = con->private;
|
|
+ struct ceph_mds_client *mdsc = s->s_mdsc;
|
|
+ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
|
|
+
|
|
+ return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
|
|
+ challenge_buf, challenge_buf_len);
|
|
+}
|
|
|
|
static int verify_authorizer_reply(struct ceph_connection *con)
|
|
{
|
|
@@ -4142,6 +4152,7 @@ static const struct ceph_connection_operations mds_con_ops = {
|
|
.put = con_put,
|
|
.dispatch = dispatch,
|
|
.get_authorizer = get_authorizer,
|
|
+ .add_authorizer_challenge = add_authorizer_challenge,
|
|
.verify_authorizer_reply = verify_authorizer_reply,
|
|
.invalidate_authorizer = invalidate_authorizer,
|
|
.peer_reset = peer_reset,
|
|
diff --git a/fs/direct-io.c b/fs/direct-io.c
|
|
index 625a84aa6484..40567501015f 100644
|
|
--- a/fs/direct-io.c
|
|
+++ b/fs/direct-io.c
|
|
@@ -304,8 +304,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
|
|
*/
|
|
dio->iocb->ki_pos += transferred;
|
|
|
|
- if (dio->op == REQ_OP_WRITE)
|
|
- ret = generic_write_sync(dio->iocb, transferred);
|
|
+ if (ret > 0 && dio->op == REQ_OP_WRITE)
|
|
+ ret = generic_write_sync(dio->iocb, ret);
|
|
dio->iocb->ki_complete(dio->iocb, ret, 0);
|
|
}
|
|
|
|
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
|
|
index 62d9a659a8ff..dd8f10db82e9 100644
|
|
--- a/fs/ext2/xattr.c
|
|
+++ b/fs/ext2/xattr.c
|
|
@@ -612,9 +612,9 @@ skip_replace:
|
|
}
|
|
|
|
cleanup:
|
|
- brelse(bh);
|
|
if (!(bh && header == HDR(bh)))
|
|
kfree(header);
|
|
+ brelse(bh);
|
|
up_write(&EXT2_I(inode)->xattr_sem);
|
|
|
|
return error;
|
|
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
|
|
index 41fce930f44c..624817eeb25e 100644
|
|
--- a/fs/f2fs/checkpoint.c
|
|
+++ b/fs/f2fs/checkpoint.c
|
|
@@ -69,6 +69,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
|
|
.old_blkaddr = index,
|
|
.new_blkaddr = index,
|
|
.encrypted_page = NULL,
|
|
+ .is_meta = is_meta,
|
|
};
|
|
|
|
if (unlikely(!is_meta))
|
|
@@ -85,8 +86,10 @@ repeat:
|
|
fio.page = page;
|
|
|
|
if (f2fs_submit_page_bio(&fio)) {
|
|
- f2fs_put_page(page, 1);
|
|
- goto repeat;
|
|
+ memset(page_address(page), 0, PAGE_SIZE);
|
|
+ f2fs_stop_checkpoint(sbi, false);
|
|
+ f2fs_bug_on(sbi, 1);
|
|
+ return page;
|
|
}
|
|
|
|
lock_page(page);
|
|
@@ -117,7 +120,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
|
|
return __get_meta_page(sbi, index, false);
|
|
}
|
|
|
|
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
|
|
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
|
|
+ block_t blkaddr, int type)
|
|
{
|
|
switch (type) {
|
|
case META_NAT:
|
|
@@ -137,8 +141,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
|
|
return false;
|
|
break;
|
|
case META_POR:
|
|
+ case DATA_GENERIC:
|
|
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
|
|
- blkaddr < MAIN_BLKADDR(sbi)))
|
|
+ blkaddr < MAIN_BLKADDR(sbi))) {
|
|
+ if (type == DATA_GENERIC) {
|
|
+ f2fs_msg(sbi->sb, KERN_WARNING,
|
|
+ "access invalid blkaddr:%u", blkaddr);
|
|
+ WARN_ON(1);
|
|
+ }
|
|
+ return false;
|
|
+ }
|
|
+ break;
|
|
+ case META_GENERIC:
|
|
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
|
|
+ blkaddr >= MAIN_BLKADDR(sbi)))
|
|
return false;
|
|
break;
|
|
default:
|
|
@@ -163,6 +179,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
|
|
.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
|
|
.encrypted_page = NULL,
|
|
.in_list = false,
|
|
+ .is_meta = (type != META_POR),
|
|
};
|
|
struct blk_plug plug;
|
|
|
|
@@ -172,7 +189,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
|
|
blk_start_plug(&plug);
|
|
for (; nrpages-- > 0; blkno++) {
|
|
|
|
- if (!is_valid_blkaddr(sbi, blkno, type))
|
|
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
|
|
goto out;
|
|
|
|
switch (type) {
|
|
@@ -737,6 +754,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
|
|
&cp_page_1, version);
|
|
if (err)
|
|
return NULL;
|
|
+
|
|
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
|
|
+ sbi->blocks_per_seg) {
|
|
+ f2fs_msg(sbi->sb, KERN_WARNING,
|
|
+ "invalid cp_pack_total_block_count:%u",
|
|
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
|
|
+ goto invalid_cp;
|
|
+ }
|
|
pre_version = *version;
|
|
|
|
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
|
|
@@ -800,15 +825,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
|
|
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
|
|
memcpy(sbi->ckpt, cp_block, blk_size);
|
|
|
|
- /* Sanity checking of checkpoint */
|
|
- if (sanity_check_ckpt(sbi))
|
|
- goto free_fail_no_cp;
|
|
-
|
|
if (cur_page == cp1)
|
|
sbi->cur_cp_pack = 1;
|
|
else
|
|
sbi->cur_cp_pack = 2;
|
|
|
|
+ /* Sanity checking of checkpoint */
|
|
+ if (sanity_check_ckpt(sbi))
|
|
+ goto free_fail_no_cp;
|
|
+
|
|
if (cp_blks <= 1)
|
|
goto done;
|
|
|
|
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
|
|
index 6fbb6d75318a..8f6e7c3a10f8 100644
|
|
--- a/fs/f2fs/data.c
|
|
+++ b/fs/f2fs/data.c
|
|
@@ -369,6 +369,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
|
|
struct page *page = fio->encrypted_page ?
|
|
fio->encrypted_page : fio->page;
|
|
|
|
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
|
|
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
|
|
+ return -EFAULT;
|
|
+
|
|
trace_f2fs_submit_page_bio(page, fio);
|
|
f2fs_trace_ios(fio, 0);
|
|
|
|
@@ -412,9 +416,9 @@ next:
|
|
spin_unlock(&io->io_lock);
|
|
}
|
|
|
|
- if (fio->old_blkaddr != NEW_ADDR)
|
|
- verify_block_addr(sbi, fio->old_blkaddr);
|
|
- verify_block_addr(sbi, fio->new_blkaddr);
|
|
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
|
|
+ verify_block_addr(fio, fio->old_blkaddr);
|
|
+ verify_block_addr(fio, fio->new_blkaddr);
|
|
|
|
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
|
|
|
|
@@ -945,7 +949,13 @@ next_dnode:
|
|
next_block:
|
|
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
|
|
|
|
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
|
|
+ if (__is_valid_data_blkaddr(blkaddr) &&
|
|
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
|
|
+ err = -EFAULT;
|
|
+ goto sync_out;
|
|
+ }
|
|
+
|
|
+ if (!is_valid_data_blkaddr(sbi, blkaddr)) {
|
|
if (create) {
|
|
if (unlikely(f2fs_cp_error(sbi))) {
|
|
err = -EIO;
|
|
@@ -1263,6 +1273,10 @@ got_it:
|
|
SetPageUptodate(page);
|
|
goto confused;
|
|
}
|
|
+
|
|
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
|
|
+ DATA_GENERIC))
|
|
+ goto set_error_page;
|
|
} else {
|
|
zero_user_segment(page, 0, PAGE_SIZE);
|
|
if (!PageUptodate(page))
|
|
@@ -1387,15 +1401,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
|
|
return need_inplace_update_policy(inode, fio);
|
|
}
|
|
|
|
-static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
|
|
-{
|
|
- if (fio->old_blkaddr == NEW_ADDR)
|
|
- return false;
|
|
- if (fio->old_blkaddr == NULL_ADDR)
|
|
- return false;
|
|
- return true;
|
|
-}
|
|
-
|
|
int do_write_data_page(struct f2fs_io_info *fio)
|
|
{
|
|
struct page *page = fio->page;
|
|
@@ -1410,11 +1415,13 @@ int do_write_data_page(struct f2fs_io_info *fio)
|
|
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
|
|
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
|
|
|
|
- if (valid_ipu_blkaddr(fio)) {
|
|
- ipu_force = true;
|
|
- fio->need_lock = LOCK_DONE;
|
|
- goto got_it;
|
|
- }
|
|
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
|
|
+ DATA_GENERIC))
|
|
+ return -EFAULT;
|
|
+
|
|
+ ipu_force = true;
|
|
+ fio->need_lock = LOCK_DONE;
|
|
+ goto got_it;
|
|
}
|
|
|
|
/* Deadlock due to between page->lock and f2fs_lock_op */
|
|
@@ -1433,11 +1440,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
|
|
goto out_writepage;
|
|
}
|
|
got_it:
|
|
+ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
|
|
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
|
|
+ DATA_GENERIC)) {
|
|
+ err = -EFAULT;
|
|
+ goto out_writepage;
|
|
+ }
|
|
/*
|
|
* If current allocation needs SSR,
|
|
* it had better in-place writes for updated data.
|
|
*/
|
|
- if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
|
|
+ if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
|
|
+ need_inplace_update(fio))) {
|
|
err = encrypt_one_page(fio);
|
|
if (err)
|
|
goto out_writepage;
|
|
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
|
|
index 54f8520ad7a2..3f1a44696036 100644
|
|
--- a/fs/f2fs/f2fs.h
|
|
+++ b/fs/f2fs/f2fs.h
|
|
@@ -162,7 +162,7 @@ struct cp_control {
|
|
};
|
|
|
|
/*
|
|
- * For CP/NAT/SIT/SSA readahead
|
|
+ * indicate meta/data type
|
|
*/
|
|
enum {
|
|
META_CP,
|
|
@@ -170,6 +170,8 @@ enum {
|
|
META_SIT,
|
|
META_SSA,
|
|
META_POR,
|
|
+ DATA_GENERIC,
|
|
+ META_GENERIC,
|
|
};
|
|
|
|
/* for the list of ino */
|
|
@@ -910,6 +912,7 @@ struct f2fs_io_info {
|
|
bool submitted; /* indicate IO submission */
|
|
int need_lock; /* indicate we need to lock cp_rwsem */
|
|
bool in_list; /* indicate fio is in io_list */
|
|
+ bool is_meta; /* indicate borrow meta inode mapping or not */
|
|
enum iostat_type io_type; /* io type */
|
|
};
|
|
|
|
@@ -2354,6 +2357,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
|
|
spin_unlock(&sbi->iostat_lock);
|
|
}
|
|
|
|
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
|
|
+ (!is_read_io(fio->op) || fio->is_meta))
|
|
+
|
|
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
|
|
+ block_t blkaddr, int type);
|
|
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
|
|
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
|
|
+ block_t blkaddr, int type)
|
|
+{
|
|
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
|
|
+ f2fs_msg(sbi->sb, KERN_ERR,
|
|
+ "invalid blkaddr: %u, type: %d, run fsck to fix.",
|
|
+ blkaddr, type);
|
|
+ f2fs_bug_on(sbi, 1);
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
|
|
+{
|
|
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
|
|
+ return false;
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
|
|
+ block_t blkaddr)
|
|
+{
|
|
+ if (!__is_valid_data_blkaddr(blkaddr))
|
|
+ return false;
|
|
+ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
|
|
+ return true;
|
|
+}
|
|
+
|
|
/*
|
|
* file.c
|
|
*/
|
|
@@ -2564,7 +2600,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
|
|
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
|
|
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
|
|
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
|
|
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
|
|
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
|
|
+ block_t blkaddr, int type);
|
|
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
|
|
int type, bool sync);
|
|
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
|
|
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
|
|
index 6f589730782d..7d3189f1941c 100644
|
|
--- a/fs/f2fs/file.c
|
|
+++ b/fs/f2fs/file.c
|
|
@@ -328,13 +328,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
|
|
return pgofs;
|
|
}
|
|
|
|
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
|
|
- int whence)
|
|
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
|
|
+ pgoff_t dirty, pgoff_t pgofs, int whence)
|
|
{
|
|
switch (whence) {
|
|
case SEEK_DATA:
|
|
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
|
|
- (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
|
|
+ is_valid_data_blkaddr(sbi, blkaddr))
|
|
return true;
|
|
break;
|
|
case SEEK_HOLE:
|
|
@@ -397,7 +397,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
|
|
blkaddr = datablock_addr(dn.inode,
|
|
dn.node_page, dn.ofs_in_node);
|
|
|
|
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
|
|
+ if (__is_valid_data_blkaddr(blkaddr) &&
|
|
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
|
|
+ blkaddr, DATA_GENERIC)) {
|
|
+ f2fs_put_dnode(&dn);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
|
|
+ pgofs, whence)) {
|
|
f2fs_put_dnode(&dn);
|
|
goto found;
|
|
}
|
|
@@ -495,6 +503,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
|
|
|
|
dn->data_blkaddr = NULL_ADDR;
|
|
set_data_blkaddr(dn);
|
|
+
|
|
+ if (__is_valid_data_blkaddr(blkaddr) &&
|
|
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
|
|
+ continue;
|
|
+
|
|
invalidate_blocks(sbi, blkaddr);
|
|
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
|
|
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
|
|
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
|
|
index 259b0aa283f0..9a40724dbaa6 100644
|
|
--- a/fs/f2fs/inode.c
|
|
+++ b/fs/f2fs/inode.c
|
|
@@ -62,11 +62,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
|
|
}
|
|
}
|
|
|
|
-static bool __written_first_block(struct f2fs_inode *ri)
|
|
+static bool __written_first_block(struct f2fs_sb_info *sbi,
|
|
+ struct f2fs_inode *ri)
|
|
{
|
|
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
|
|
|
|
- if (addr != NEW_ADDR && addr != NULL_ADDR)
|
|
+ if (is_valid_data_blkaddr(sbi, addr))
|
|
return true;
|
|
return false;
|
|
}
|
|
@@ -179,6 +180,72 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
|
|
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
|
|
}
|
|
|
|
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
|
|
+{
|
|
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
+ struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
+ unsigned long long iblocks;
|
|
+
|
|
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
|
|
+ if (!iblocks) {
|
|
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
+ f2fs_msg(sbi->sb, KERN_WARNING,
|
|
+ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
|
|
+ "run fsck to fix.",
|
|
+ __func__, inode->i_ino, iblocks);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
|
|
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
+ f2fs_msg(sbi->sb, KERN_WARNING,
|
|
+ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
|
|
+ "[%u, %u] run fsck to fix.",
|
|
+ __func__, inode->i_ino,
|
|
+ ino_of_node(node_page), nid_of_node(node_page));
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (f2fs_has_extra_attr(inode) &&
|
|
+ !f2fs_sb_has_extra_attr(sbi->sb)) {
|
|
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
+ f2fs_msg(sbi->sb, KERN_WARNING,
|
|
+ "%s: inode (ino=%lx) is with extra_attr, "
|
|
+ "but extra_attr feature is off",
|
|
+ __func__, inode->i_ino);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
|
|
+ fi->i_extra_isize % sizeof(__le32)) {
|
|
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
+ f2fs_msg(sbi->sb, KERN_WARNING,
|
|
+ "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
|
|
+ "max: %zu",
|
|
+ __func__, inode->i_ino, fi->i_extra_isize,
|
|
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (F2FS_I(inode)->extent_tree) {
|
|
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
|
|
+
|
|
+ if (ei->len &&
|
|
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
|
|
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
|
|
+ DATA_GENERIC))) {
|
|
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
+ f2fs_msg(sbi->sb, KERN_WARNING,
|
|
+ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
|
|
+ "is incorrect, run fsck to fix",
|
|
+ __func__, inode->i_ino,
|
|
+ ei->blk, ei->fofs, ei->len);
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
static int do_read_inode(struct inode *inode)
|
|
{
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
@@ -228,6 +295,11 @@ static int do_read_inode(struct inode *inode)
|
|
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
|
|
le16_to_cpu(ri->i_extra_isize) : 0;
|
|
|
|
+ if (!sanity_check_inode(inode, node_page)) {
|
|
+ f2fs_put_page(node_page, 1);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
/* check data exist */
|
|
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
|
|
__recover_inline_status(inode, node_page);
|
|
@@ -235,7 +307,7 @@ static int do_read_inode(struct inode *inode)
|
|
/* get rdev by using inline_info */
|
|
__get_inode_rdev(inode, ri);
|
|
|
|
- if (__written_first_block(ri))
|
|
+ if (__written_first_block(sbi, ri))
|
|
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
|
|
|
|
if (!need_inode_block_update(sbi, inode->i_ino))
|
|
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
|
|
index 712505ec5de4..65de72d65562 100644
|
|
--- a/fs/f2fs/node.c
|
|
+++ b/fs/f2fs/node.c
|
|
@@ -334,8 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
|
|
new_blkaddr == NULL_ADDR);
|
|
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
|
|
new_blkaddr == NEW_ADDR);
|
|
- f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
|
|
- nat_get_blkaddr(e) != NULL_ADDR &&
|
|
+ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
|
|
new_blkaddr == NEW_ADDR);
|
|
|
|
/* increment version no as node is removed */
|
|
@@ -350,7 +349,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
|
|
|
|
/* change address */
|
|
nat_set_blkaddr(e, new_blkaddr);
|
|
- if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
|
|
+ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
|
|
set_nat_flag(e, IS_CHECKPOINTED, false);
|
|
__set_nat_cache_dirty(nm_i, e);
|
|
|
|
@@ -1399,6 +1398,12 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
|
|
return 0;
|
|
}
|
|
|
|
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
|
|
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
|
|
+ up_read(&sbi->node_write);
|
|
+ goto redirty_out;
|
|
+ }
|
|
+
|
|
if (atomic && !test_opt(sbi, NOBARRIER))
|
|
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
|
|
|
|
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
|
|
index 765fadf954af..6ea445377767 100644
|
|
--- a/fs/f2fs/recovery.c
|
|
+++ b/fs/f2fs/recovery.c
|
|
@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
|
|
while (1) {
|
|
struct fsync_inode_entry *entry;
|
|
|
|
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
|
|
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
|
|
return 0;
|
|
|
|
page = get_tmp_page(sbi, blkaddr);
|
|
@@ -479,7 +479,7 @@ retry_dn:
|
|
}
|
|
|
|
/* dest is valid block, try to recover from src to dest */
|
|
- if (is_valid_blkaddr(sbi, dest, META_POR)) {
|
|
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
|
|
|
|
if (src == NULL_ADDR) {
|
|
err = reserve_new_block(&dn);
|
|
@@ -540,7 +540,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
|
|
while (1) {
|
|
struct fsync_inode_entry *entry;
|
|
|
|
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
|
|
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
|
|
break;
|
|
|
|
ra_meta_pages_cond(sbi, blkaddr);
|
|
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
|
|
index 3c7bbbae0afa..5c698757e116 100644
|
|
--- a/fs/f2fs/segment.c
|
|
+++ b/fs/f2fs/segment.c
|
|
@@ -1758,7 +1758,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
|
|
struct seg_entry *se;
|
|
bool is_cp = false;
|
|
|
|
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
|
|
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
|
|
return true;
|
|
|
|
mutex_lock(&sit_i->sentry_lock);
|
|
@@ -2571,7 +2571,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
|
|
{
|
|
struct page *cpage;
|
|
|
|
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
|
|
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
|
|
return;
|
|
|
|
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
|
|
@@ -3304,6 +3304,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
|
|
unsigned int old_valid_blocks;
|
|
|
|
start = le32_to_cpu(segno_in_journal(journal, i));
|
|
+ if (start >= MAIN_SEGS(sbi)) {
|
|
+ f2fs_msg(sbi->sb, KERN_ERR,
|
|
+ "Wrong journal entry on segno %u",
|
|
+ start);
|
|
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
+ err = -EINVAL;
|
|
+ break;
|
|
+ }
|
|
+
|
|
se = &sit_i->sentries[start];
|
|
sit = sit_in_journal(journal, i);
|
|
|
|
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
|
|
index 4dfb5080098f..47348d98165b 100644
|
|
--- a/fs/f2fs/segment.h
|
|
+++ b/fs/f2fs/segment.h
|
|
@@ -53,13 +53,19 @@
|
|
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
|
|
(sbi)->segs_per_sec)) \
|
|
|
|
-#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
|
|
-#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
|
|
+#define MAIN_BLKADDR(sbi) \
|
|
+ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
|
|
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
|
|
+#define SEG0_BLKADDR(sbi) \
|
|
+ (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \
|
|
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr))
|
|
|
|
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
|
|
#define MAIN_SECS(sbi) ((sbi)->total_sections)
|
|
|
|
-#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
|
|
+#define TOTAL_SEGS(sbi) \
|
|
+ (SM_I(sbi) ? SM_I(sbi)->segment_count : \
|
|
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
|
|
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
|
|
|
|
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
|
|
@@ -79,7 +85,7 @@
|
|
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
|
|
|
|
#define GET_SEGNO(sbi, blk_addr) \
|
|
- ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
|
|
+ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
|
|
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
|
|
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
|
|
#define BLKS_PER_SEC(sbi) \
|
|
@@ -619,10 +625,14 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
|
|
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
|
|
}
|
|
|
|
-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
|
|
+static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
|
|
{
|
|
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
|
|
- || blk_addr >= MAX_BLKADDR(sbi));
|
|
+ struct f2fs_sb_info *sbi = fio->sbi;
|
|
+
|
|
+ if (__is_meta_io(fio))
|
|
+ verify_blkaddr(sbi, blk_addr, META_GENERIC);
|
|
+ else
|
|
+ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
|
|
}
|
|
|
|
/*
|
|
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
|
|
index 7cda685296b2..de4de4ebe64c 100644
|
|
--- a/fs/f2fs/super.c
|
|
+++ b/fs/f2fs/super.c
|
|
@@ -1807,6 +1807,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
|
|
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
|
|
struct buffer_head *bh)
|
|
{
|
|
+ block_t segment_count, segs_per_sec, secs_per_zone;
|
|
+ block_t total_sections, blocks_per_seg;
|
|
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
|
|
(bh->b_data + F2FS_SUPER_OFFSET);
|
|
struct super_block *sb = sbi->sb;
|
|
@@ -1863,6 +1865,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
|
|
return 1;
|
|
}
|
|
|
|
+ segment_count = le32_to_cpu(raw_super->segment_count);
|
|
+ segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
|
|
+ secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
|
|
+ total_sections = le32_to_cpu(raw_super->section_count);
|
|
+
|
|
+ /* blocks_per_seg should be 512, given the above check */
|
|
+ blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
|
|
+
|
|
+ if (segment_count > F2FS_MAX_SEGMENT ||
|
|
+ segment_count < F2FS_MIN_SEGMENTS) {
|
|
+ f2fs_msg(sb, KERN_INFO,
|
|
+ "Invalid segment count (%u)",
|
|
+ segment_count);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (total_sections > segment_count ||
|
|
+ total_sections < F2FS_MIN_SEGMENTS ||
|
|
+ segs_per_sec > segment_count || !segs_per_sec) {
|
|
+ f2fs_msg(sb, KERN_INFO,
|
|
+ "Invalid segment/section count (%u, %u x %u)",
|
|
+ segment_count, total_sections, segs_per_sec);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if ((segment_count / segs_per_sec) < total_sections) {
|
|
+ f2fs_msg(sb, KERN_INFO,
|
|
+ "Small segment_count (%u < %u * %u)",
|
|
+ segment_count, segs_per_sec, total_sections);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
|
|
+ f2fs_msg(sb, KERN_INFO,
|
|
+ "Wrong segment_count / block_count (%u > %u)",
|
|
+ segment_count, le32_to_cpu(raw_super->block_count));
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (secs_per_zone > total_sections || !secs_per_zone) {
|
|
+ f2fs_msg(sb, KERN_INFO,
|
|
+ "Wrong secs_per_zone / total_sections (%u, %u)",
|
|
+ secs_per_zone, total_sections);
|
|
+ return 1;
|
|
+ }
|
|
+ if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
|
|
+ f2fs_msg(sb, KERN_INFO,
|
|
+ "Corrupted extension count (%u > %u)",
|
|
+ le32_to_cpu(raw_super->extension_count),
|
|
+ F2FS_MAX_EXTENSION);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (le32_to_cpu(raw_super->cp_payload) >
|
|
+ (blocks_per_seg - F2FS_CP_PACKS)) {
|
|
+ f2fs_msg(sb, KERN_INFO,
|
|
+ "Insane cp_payload (%u > %u)",
|
|
+ le32_to_cpu(raw_super->cp_payload),
|
|
+ blocks_per_seg - F2FS_CP_PACKS);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
/* check reserved ino info */
|
|
if (le32_to_cpu(raw_super->node_ino) != 1 ||
|
|
le32_to_cpu(raw_super->meta_ino) != 2 ||
|
|
@@ -1875,13 +1939,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
|
|
return 1;
|
|
}
|
|
|
|
- if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
|
|
- f2fs_msg(sb, KERN_INFO,
|
|
- "Invalid segment count (%u)",
|
|
- le32_to_cpu(raw_super->segment_count));
|
|
- return 1;
|
|
- }
|
|
-
|
|
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
|
|
if (sanity_check_area_boundary(sbi, bh))
|
|
return 1;
|
|
@@ -1899,6 +1956,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
|
unsigned int sit_segs, nat_segs;
|
|
unsigned int sit_bitmap_size, nat_bitmap_size;
|
|
unsigned int log_blocks_per_seg;
|
|
+ unsigned int segment_count_main;
|
|
+ unsigned int cp_pack_start_sum, cp_payload;
|
|
+ block_t user_block_count;
|
|
int i;
|
|
|
|
total = le32_to_cpu(raw_super->segment_count);
|
|
@@ -1923,6 +1983,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
|
return 1;
|
|
}
|
|
|
|
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
|
|
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
|
|
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
|
|
+ if (!user_block_count || user_block_count >=
|
|
+ segment_count_main << log_blocks_per_seg) {
|
|
+ f2fs_msg(sbi->sb, KERN_ERR,
|
|
+ "Wrong user_block_count: %u", user_block_count);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
main_segs = le32_to_cpu(raw_super->segment_count_main);
|
|
blocks_per_seg = sbi->blocks_per_seg;
|
|
|
|
@@ -1939,7 +2009,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
|
|
|
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
|
|
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
|
|
- log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
|
|
|
|
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
|
|
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
|
|
@@ -1949,6 +2018,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
|
return 1;
|
|
}
|
|
|
|
+ cp_pack_start_sum = __start_sum_addr(sbi);
|
|
+ cp_payload = __cp_payload(sbi);
|
|
+ if (cp_pack_start_sum < cp_payload + 1 ||
|
|
+ cp_pack_start_sum > blocks_per_seg - 1 -
|
|
+ NR_CURSEG_TYPE) {
|
|
+ f2fs_msg(sbi->sb, KERN_ERR,
|
|
+ "Wrong cp_pack_start_sum: %u",
|
|
+ cp_pack_start_sum);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
if (unlikely(f2fs_cp_error(sbi))) {
|
|
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
|
|
return 1;
|
|
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
|
|
index 6249c92671de..ea66f04f46f7 100644
|
|
--- a/fs/xfs/libxfs/xfs_attr.c
|
|
+++ b/fs/xfs/libxfs/xfs_attr.c
|
|
@@ -501,7 +501,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
|
|
if (args->flags & ATTR_CREATE)
|
|
return retval;
|
|
retval = xfs_attr_shortform_remove(args);
|
|
- ASSERT(retval == 0);
|
|
+ if (retval)
|
|
+ return retval;
|
|
+ /*
|
|
+ * Since we have removed the old attr, clear ATTR_REPLACE so
|
|
+ * that the leaf format add routine won't trip over the attr
|
|
+ * not being around.
|
|
+ */
|
|
+ args->flags &= ~ATTR_REPLACE;
|
|
}
|
|
|
|
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
|
|
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
|
|
index a3333004fd2b..8458cc5fbce5 100644
|
|
--- a/include/linux/bpf_verifier.h
|
|
+++ b/include/linux/bpf_verifier.h
|
|
@@ -113,6 +113,7 @@ struct bpf_insn_aux_data {
|
|
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
|
|
};
|
|
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
|
+ int sanitize_stack_off; /* stack slot to be cleared */
|
|
bool seen; /* this insn was processed by the verifier */
|
|
};
|
|
|
|
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
|
|
index e931da8424a4..6728c2ee0205 100644
|
|
--- a/include/linux/ceph/auth.h
|
|
+++ b/include/linux/ceph/auth.h
|
|
@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
|
|
/* ensure that an existing authorizer is up to date */
|
|
int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
|
|
struct ceph_auth_handshake *auth);
|
|
+ int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
|
|
+ struct ceph_authorizer *a,
|
|
+ void *challenge_buf,
|
|
+ int challenge_buf_len);
|
|
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
|
|
struct ceph_authorizer *a);
|
|
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
|
|
@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
|
|
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
|
|
int peer_type,
|
|
struct ceph_auth_handshake *a);
|
|
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
|
|
+ struct ceph_authorizer *a,
|
|
+ void *challenge_buf,
|
|
+ int challenge_buf_len);
|
|
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
|
|
struct ceph_authorizer *a);
|
|
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
|
|
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
|
|
index 59042d5ac520..70f42eef813b 100644
|
|
--- a/include/linux/ceph/ceph_features.h
|
|
+++ b/include/linux/ceph/ceph_features.h
|
|
@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
|
|
DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
|
|
DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
|
|
DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
|
|
-DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit*
|
|
+DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
|
|
+DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit*
|
|
|
|
-DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down!
|
|
DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal
|
|
DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
|
|
|
|
@@ -209,7 +209,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
|
|
CEPH_FEATURE_SERVER_JEWEL | \
|
|
CEPH_FEATURE_MON_STATEFUL_SUB | \
|
|
CEPH_FEATURE_CRUSH_TUNABLES5 | \
|
|
- CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
|
|
+ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
|
|
+ CEPH_FEATURE_CEPHX_V2)
|
|
|
|
#define CEPH_FEATURES_REQUIRED_DEFAULT \
|
|
(CEPH_FEATURE_NOSRCADDR | \
|
|
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
|
|
index ead9d85f1c11..18fbe910ed55 100644
|
|
--- a/include/linux/ceph/messenger.h
|
|
+++ b/include/linux/ceph/messenger.h
|
|
@@ -31,6 +31,9 @@ struct ceph_connection_operations {
|
|
struct ceph_auth_handshake *(*get_authorizer) (
|
|
struct ceph_connection *con,
|
|
int *proto, int force_new);
|
|
+ int (*add_authorizer_challenge)(struct ceph_connection *con,
|
|
+ void *challenge_buf,
|
|
+ int challenge_buf_len);
|
|
int (*verify_authorizer_reply) (struct ceph_connection *con);
|
|
int (*invalidate_authorizer)(struct ceph_connection *con);
|
|
|
|
@@ -203,9 +206,8 @@ struct ceph_connection {
|
|
attempt for this connection, client */
|
|
u32 peer_global_seq; /* peer's global seq for this connection */
|
|
|
|
+ struct ceph_auth_handshake *auth;
|
|
int auth_retry; /* true if we need a newer authorizer */
|
|
- void *auth_reply_buf; /* where to put the authorizer reply */
|
|
- int auth_reply_buf_len;
|
|
|
|
struct mutex mutex;
|
|
|
|
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
|
|
index 73ae2a926548..9e50aede46c8 100644
|
|
--- a/include/linux/ceph/msgr.h
|
|
+++ b/include/linux/ceph/msgr.h
|
|
@@ -91,7 +91,7 @@ struct ceph_entity_inst {
|
|
#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
|
|
#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
|
|
#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
|
|
-
|
|
+#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */
|
|
|
|
/*
|
|
* connection negotiation
|
|
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
|
|
index 3b7675bcca64..cd0d2270998f 100644
|
|
--- a/include/linux/jump_label.h
|
|
+++ b/include/linux/jump_label.h
|
|
@@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
|
|
extern int jump_label_text_reserved(void *start, void *end);
|
|
extern void static_key_slow_inc(struct static_key *key);
|
|
extern void static_key_slow_dec(struct static_key *key);
|
|
+extern void static_key_slow_inc_cpuslocked(struct static_key *key);
|
|
+extern void static_key_slow_dec_cpuslocked(struct static_key *key);
|
|
extern void jump_label_apply_nops(struct module *mod);
|
|
extern int static_key_count(struct static_key *key);
|
|
extern void static_key_enable(struct static_key *key);
|
|
@@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key)
|
|
atomic_dec(&key->enabled);
|
|
}
|
|
|
|
+#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key)
|
|
+#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key)
|
|
+
|
|
static inline int jump_label_text_reserved(void *start, void *end)
|
|
{
|
|
return 0;
|
|
@@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void);
|
|
|
|
#define static_branch_inc(x) static_key_slow_inc(&(x)->key)
|
|
#define static_branch_dec(x) static_key_slow_dec(&(x)->key)
|
|
+#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key)
|
|
+#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key)
|
|
|
|
/*
|
|
* Normal usage; boolean enable/disable.
|
|
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
|
|
index 919b2a0b0307..38342e88b3f3 100644
|
|
--- a/include/linux/ptrace.h
|
|
+++ b/include/linux/ptrace.h
|
|
@@ -62,8 +62,8 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
|
|
#define PTRACE_MODE_READ 0x01
|
|
#define PTRACE_MODE_ATTACH 0x02
|
|
#define PTRACE_MODE_NOAUDIT 0x04
|
|
-#define PTRACE_MODE_FSCREDS 0x08
|
|
-#define PTRACE_MODE_REALCREDS 0x10
|
|
+#define PTRACE_MODE_FSCREDS 0x08
|
|
+#define PTRACE_MODE_REALCREDS 0x10
|
|
|
|
/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
|
|
#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
|
index e04919aa8201..866439c361a9 100644
|
|
--- a/include/linux/sched.h
|
|
+++ b/include/linux/sched.h
|
|
@@ -1405,6 +1405,8 @@ static inline bool is_percpu_thread(void)
|
|
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
|
|
#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
|
|
#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
|
|
+#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
|
|
+#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
|
|
|
|
#define TASK_PFA_TEST(name, func) \
|
|
static inline bool task_##func(struct task_struct *p) \
|
|
@@ -1436,6 +1438,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
|
|
TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
|
|
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
|
|
|
|
+TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
|
|
+TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
|
|
+TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
|
|
+
|
|
+TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
|
|
+TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
|
|
+
|
|
static inline void
|
|
current_restore_flags(unsigned long orig_flags, unsigned long flags)
|
|
{
|
|
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
|
|
new file mode 100644
|
|
index 000000000000..59d3736c454c
|
|
--- /dev/null
|
|
+++ b/include/linux/sched/smt.h
|
|
@@ -0,0 +1,20 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
+#ifndef _LINUX_SCHED_SMT_H
|
|
+#define _LINUX_SCHED_SMT_H
|
|
+
|
|
+#include <linux/static_key.h>
|
|
+
|
|
+#ifdef CONFIG_SCHED_SMT
|
|
+extern struct static_key_false sched_smt_present;
|
|
+
|
|
+static __always_inline bool sched_smt_active(void)
|
|
+{
|
|
+ return static_branch_likely(&sched_smt_present);
|
|
+}
|
|
+#else
|
|
+static inline bool sched_smt_active(void) { return false; }
|
|
+#endif
|
|
+
|
|
+void arch_smt_update(void);
|
|
+
|
|
+#endif
|
|
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
|
|
index f64e88444082..f6250555ce7d 100644
|
|
--- a/include/linux/skbuff.h
|
|
+++ b/include/linux/skbuff.h
|
|
@@ -1288,6 +1288,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
|
|
}
|
|
}
|
|
|
|
+static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
|
|
+{
|
|
+ skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
|
|
+ skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
|
|
+}
|
|
+
|
|
+static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
|
|
+{
|
|
+ return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
|
|
+}
|
|
+
|
|
+static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
|
|
+{
|
|
+ return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
|
|
+}
|
|
+
|
|
/* Release a reference on a zerocopy structure */
|
|
static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
|
|
{
|
|
@@ -1297,7 +1313,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
|
|
if (uarg->callback == sock_zerocopy_callback) {
|
|
uarg->zerocopy = uarg->zerocopy && zerocopy;
|
|
sock_zerocopy_put(uarg);
|
|
- } else {
|
|
+ } else if (!skb_zcopy_is_nouarg(skb)) {
|
|
uarg->callback(uarg, zerocopy);
|
|
}
|
|
|
|
diff --git a/include/net/tls.h b/include/net/tls.h
|
|
index 86ed3dd80fe7..604fd982da19 100644
|
|
--- a/include/net/tls.h
|
|
+++ b/include/net/tls.h
|
|
@@ -89,6 +89,8 @@ struct tls_context {
|
|
|
|
void *priv_ctx;
|
|
|
|
+ u8 tx_conf:2;
|
|
+
|
|
u16 prepend_size;
|
|
u16 tag_size;
|
|
u16 overhead_size;
|
|
@@ -104,7 +106,6 @@ struct tls_context {
|
|
|
|
u16 pending_open_record_frags;
|
|
int (*push_pending_record)(struct sock *sk, int flags);
|
|
- void (*free_resources)(struct sock *sk);
|
|
|
|
void (*sk_write_space)(struct sock *sk);
|
|
void (*sk_proto_close)(struct sock *sk, long timeout);
|
|
@@ -129,6 +130,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
|
|
int tls_sw_sendpage(struct sock *sk, struct page *page,
|
|
int offset, size_t size, int flags);
|
|
void tls_sw_close(struct sock *sk, long timeout);
|
|
+void tls_sw_free_tx_resources(struct sock *sk);
|
|
|
|
void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
|
|
void tls_icsk_clean_acked(struct sock *sk);
|
|
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
|
|
index 7115838fbf2a..38ab0e06259a 100644
|
|
--- a/include/uapi/linux/btrfs_tree.h
|
|
+++ b/include/uapi/linux/btrfs_tree.h
|
|
@@ -734,6 +734,7 @@ struct btrfs_balance_item {
|
|
#define BTRFS_FILE_EXTENT_INLINE 0
|
|
#define BTRFS_FILE_EXTENT_REG 1
|
|
#define BTRFS_FILE_EXTENT_PREALLOC 2
|
|
+#define BTRFS_FILE_EXTENT_TYPES 2
|
|
|
|
struct btrfs_file_extent_item {
|
|
/*
|
|
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
|
|
index 3027f943f4b3..214102fab940 100644
|
|
--- a/include/uapi/linux/prctl.h
|
|
+++ b/include/uapi/linux/prctl.h
|
|
@@ -203,6 +203,7 @@ struct prctl_mm_map {
|
|
#define PR_SET_SPECULATION_CTRL 53
|
|
/* Speculation control variants */
|
|
# define PR_SPEC_STORE_BYPASS 0
|
|
+# define PR_SPEC_INDIRECT_BRANCH 1
|
|
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
|
|
# define PR_SPEC_NOT_AFFECTED 0
|
|
# define PR_SPEC_PRCTL (1UL << 0)
|
|
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
|
|
index 013b0cd1958e..f6755fd5bae2 100644
|
|
--- a/kernel/bpf/verifier.c
|
|
+++ b/kernel/bpf/verifier.c
|
|
@@ -717,8 +717,9 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
|
/* check_stack_read/write functions track spill/fill of registers,
|
|
* stack boundary and alignment are checked in check_mem_access()
|
|
*/
|
|
-static int check_stack_write(struct bpf_verifier_state *state, int off,
|
|
- int size, int value_regno)
|
|
+static int check_stack_write(struct bpf_verifier_env *env,
|
|
+ struct bpf_verifier_state *state, int off,
|
|
+ int size, int value_regno, int insn_idx)
|
|
{
|
|
int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
|
|
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
|
|
@@ -738,8 +739,32 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
|
|
state->spilled_regs[spi] = state->regs[value_regno];
|
|
state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
|
|
|
|
- for (i = 0; i < BPF_REG_SIZE; i++)
|
|
+ for (i = 0; i < BPF_REG_SIZE; i++) {
|
|
+ if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
|
|
+ !env->allow_ptr_leaks) {
|
|
+ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
|
|
+ int soff = (-spi - 1) * BPF_REG_SIZE;
|
|
+
|
|
+ /* detected reuse of integer stack slot with a pointer
|
|
+ * which means either llvm is reusing stack slot or
|
|
+ * an attacker is trying to exploit CVE-2018-3639
|
|
+ * (speculative store bypass)
|
|
+ * Have to sanitize that slot with preemptive
|
|
+ * store of zero.
|
|
+ */
|
|
+ if (*poff && *poff != soff) {
|
|
+ /* disallow programs where single insn stores
|
|
+ * into two different stack slots, since verifier
|
|
+ * cannot sanitize them
|
|
+ */
|
|
+ verbose("insn %d cannot access two stack slots fp%d and fp%d",
|
|
+ insn_idx, *poff, soff);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ *poff = soff;
|
|
+ }
|
|
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
|
|
+ }
|
|
} else {
|
|
/* regular write of data into stack */
|
|
state->spilled_regs[spi] = (struct bpf_reg_state) {};
|
|
@@ -1216,7 +1241,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
|
verbose("attempt to corrupt spilled pointer on stack\n");
|
|
return -EACCES;
|
|
}
|
|
- err = check_stack_write(state, off, size, value_regno);
|
|
+ err = check_stack_write(env, state, off, size,
|
|
+ value_regno, insn_idx);
|
|
} else {
|
|
err = check_stack_read(state, off, size, value_regno);
|
|
}
|
|
@@ -4270,6 +4296,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
|
else
|
|
continue;
|
|
|
|
+ if (type == BPF_WRITE &&
|
|
+ env->insn_aux_data[i + delta].sanitize_stack_off) {
|
|
+ struct bpf_insn patch[] = {
|
|
+ /* Sanitize suspicious stack slot with zero.
|
|
+ * There are no memory dependencies for this store,
|
|
+ * since it's only using frame pointer and immediate
|
|
+ * constant of zero
|
|
+ */
|
|
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP,
|
|
+ env->insn_aux_data[i + delta].sanitize_stack_off,
|
|
+ 0),
|
|
+ /* the original STX instruction will immediately
|
|
+ * overwrite the same stack slot with appropriate value
|
|
+ */
|
|
+ *insn,
|
|
+ };
|
|
+
|
|
+ cnt = ARRAY_SIZE(patch);
|
|
+ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
|
|
+ if (!new_prog)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ delta += cnt - 1;
|
|
+ env->prog = new_prog;
|
|
+ insn = new_prog->insnsi + i + delta;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
|
|
continue;
|
|
|
|
diff --git a/kernel/cpu.c b/kernel/cpu.c
|
|
index f3f389e33343..5c907d96e3dd 100644
|
|
--- a/kernel/cpu.c
|
|
+++ b/kernel/cpu.c
|
|
@@ -10,6 +10,7 @@
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/sched/hotplug.h>
|
|
#include <linux/sched/task.h>
|
|
+#include <linux/sched/smt.h>
|
|
#include <linux/unistd.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/oom.h>
|
|
@@ -347,6 +348,12 @@ void cpu_hotplug_enable(void)
|
|
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
|
|
#endif /* CONFIG_HOTPLUG_CPU */
|
|
|
|
+/*
|
|
+ * Architectures that need SMT-specific errata handling during SMT hotplug
|
|
+ * should override this.
|
|
+ */
|
|
+void __weak arch_smt_update(void) { }
|
|
+
|
|
#ifdef CONFIG_HOTPLUG_SMT
|
|
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
|
|
EXPORT_SYMBOL_GPL(cpu_smt_control);
|
|
@@ -998,6 +1005,7 @@ out:
|
|
* concurrent CPU hotplug via cpu_add_remove_lock.
|
|
*/
|
|
lockup_detector_cleanup();
|
|
+ arch_smt_update();
|
|
return ret;
|
|
}
|
|
|
|
@@ -1126,6 +1134,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
|
|
ret = cpuhp_up_callbacks(cpu, st, target);
|
|
out:
|
|
cpus_write_unlock();
|
|
+ arch_smt_update();
|
|
return ret;
|
|
}
|
|
|
|
@@ -2071,8 +2080,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
|
|
*/
|
|
cpuhp_offline_cpu_device(cpu);
|
|
}
|
|
- if (!ret)
|
|
+ if (!ret) {
|
|
cpu_smt_control = ctrlval;
|
|
+ arch_smt_update();
|
|
+ }
|
|
cpu_maps_update_done();
|
|
return ret;
|
|
}
|
|
@@ -2083,6 +2094,7 @@ static int cpuhp_smt_enable(void)
|
|
|
|
cpu_maps_update_begin();
|
|
cpu_smt_control = CPU_SMT_ENABLED;
|
|
+ arch_smt_update();
|
|
for_each_present_cpu(cpu) {
|
|
/* Skip online CPUs and CPUs on offline nodes */
|
|
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
|
|
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
|
|
index 7c3774ac1d51..70be35a19be2 100644
|
|
--- a/kernel/jump_label.c
|
|
+++ b/kernel/jump_label.c
|
|
@@ -79,7 +79,7 @@ int static_key_count(struct static_key *key)
|
|
}
|
|
EXPORT_SYMBOL_GPL(static_key_count);
|
|
|
|
-static void static_key_slow_inc_cpuslocked(struct static_key *key)
|
|
+void static_key_slow_inc_cpuslocked(struct static_key *key)
|
|
{
|
|
int v, v1;
|
|
|
|
@@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key)
|
|
}
|
|
EXPORT_SYMBOL_GPL(static_key_disable);
|
|
|
|
-static void static_key_slow_dec_cpuslocked(struct static_key *key,
|
|
+static void __static_key_slow_dec_cpuslocked(struct static_key *key,
|
|
unsigned long rate_limit,
|
|
struct delayed_work *work)
|
|
{
|
|
@@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key,
|
|
struct delayed_work *work)
|
|
{
|
|
cpus_read_lock();
|
|
- static_key_slow_dec_cpuslocked(key, rate_limit, work);
|
|
+ __static_key_slow_dec_cpuslocked(key, rate_limit, work);
|
|
cpus_read_unlock();
|
|
}
|
|
|
|
@@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key)
|
|
}
|
|
EXPORT_SYMBOL_GPL(static_key_slow_dec);
|
|
|
|
+void static_key_slow_dec_cpuslocked(struct static_key *key)
|
|
+{
|
|
+ STATIC_KEY_CHECK_USE();
|
|
+ __static_key_slow_dec_cpuslocked(key, 0, NULL);
|
|
+}
|
|
+
|
|
void static_key_slow_dec_deferred(struct static_key_deferred *key)
|
|
{
|
|
STATIC_KEY_CHECK_USE();
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
|
index 3bc664662081..0552ddbb25e2 100644
|
|
--- a/kernel/sched/core.c
|
|
+++ b/kernel/sched/core.c
|
|
@@ -5617,15 +5617,10 @@ int sched_cpu_activate(unsigned int cpu)
|
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
/*
|
|
- * The sched_smt_present static key needs to be evaluated on every
|
|
- * hotplug event because at boot time SMT might be disabled when
|
|
- * the number of booted CPUs is limited.
|
|
- *
|
|
- * If then later a sibling gets hotplugged, then the key would stay
|
|
- * off and SMT scheduling would never be functional.
|
|
+ * When going up, increment the number of cores with SMT present.
|
|
*/
|
|
- if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
|
|
- static_branch_enable_cpuslocked(&sched_smt_present);
|
|
+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
|
|
+ static_branch_inc_cpuslocked(&sched_smt_present);
|
|
#endif
|
|
set_cpu_active(cpu, true);
|
|
|
|
@@ -5669,6 +5664,14 @@ int sched_cpu_deactivate(unsigned int cpu)
|
|
*/
|
|
synchronize_rcu_mult(call_rcu, call_rcu_sched);
|
|
|
|
+#ifdef CONFIG_SCHED_SMT
|
|
+ /*
|
|
+ * When going down, decrement the number of cores with SMT present.
|
|
+ */
|
|
+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
|
|
+ static_branch_dec_cpuslocked(&sched_smt_present);
|
|
+#endif
|
|
+
|
|
if (!sched_smp_initialized)
|
|
return 0;
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
|
index 2d4d79420e36..7240bb4a4090 100644
|
|
--- a/kernel/sched/fair.c
|
|
+++ b/kernel/sched/fair.c
|
|
@@ -4040,12 +4040,12 @@ static inline bool cfs_bandwidth_used(void)
|
|
|
|
void cfs_bandwidth_usage_inc(void)
|
|
{
|
|
- static_key_slow_inc(&__cfs_bandwidth_used);
|
|
+ static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
|
|
}
|
|
|
|
void cfs_bandwidth_usage_dec(void)
|
|
{
|
|
- static_key_slow_dec(&__cfs_bandwidth_used);
|
|
+ static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
|
|
}
|
|
#else /* HAVE_JUMP_LABEL */
|
|
static bool cfs_bandwidth_used(void)
|
|
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
|
index 63d999dfec80..b3ba6e5e99f2 100644
|
|
--- a/kernel/sched/sched.h
|
|
+++ b/kernel/sched/sched.h
|
|
@@ -20,6 +20,7 @@
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/sched/cputime.h>
|
|
#include <linux/sched/init.h>
|
|
+#include <linux/sched/smt.h>
|
|
|
|
#include <linux/u64_stats_sync.h>
|
|
#include <linux/kernel_stat.h>
|
|
@@ -825,9 +826,6 @@ static inline int cpu_of(struct rq *rq)
|
|
|
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
-
|
|
-extern struct static_key_false sched_smt_present;
|
|
-
|
|
extern void __update_idle_core(struct rq *rq);
|
|
|
|
static inline void update_idle_core(struct rq *rq)
|
|
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
|
|
index 96c304fd656a..7abb59ce6613 100644
|
|
--- a/lib/test_kmod.c
|
|
+++ b/lib/test_kmod.c
|
|
@@ -1221,7 +1221,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev)
|
|
|
|
dev_info(test_dev->dev, "removing interface\n");
|
|
misc_deregister(&test_dev->misc_dev);
|
|
- kfree(&test_dev->misc_dev.name);
|
|
|
|
mutex_unlock(&test_dev->config_mutex);
|
|
mutex_unlock(&test_dev->trigger_mutex);
|
|
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
|
|
index adacfe66cf3d..930f2aa3bb4d 100644
|
|
--- a/mm/huge_memory.c
|
|
+++ b/mm/huge_memory.c
|
|
@@ -2280,7 +2280,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
|
}
|
|
}
|
|
|
|
-static void freeze_page(struct page *page)
|
|
+static void unmap_page(struct page *page)
|
|
{
|
|
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
|
|
TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
|
|
@@ -2295,7 +2295,7 @@ static void freeze_page(struct page *page)
|
|
VM_BUG_ON_PAGE(!unmap_success, page);
|
|
}
|
|
|
|
-static void unfreeze_page(struct page *page)
|
|
+static void remap_page(struct page *page)
|
|
{
|
|
int i;
|
|
if (PageTransHuge(page)) {
|
|
@@ -2312,26 +2312,13 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
|
struct page *page_tail = head + tail;
|
|
|
|
VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
|
|
- VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
|
|
|
|
/*
|
|
- * tail_page->_refcount is zero and not changing from under us. But
|
|
- * get_page_unless_zero() may be running from under us on the
|
|
- * tail_page. If we used atomic_set() below instead of atomic_inc() or
|
|
- * atomic_add(), we would then run atomic_set() concurrently with
|
|
- * get_page_unless_zero(), and atomic_set() is implemented in C not
|
|
- * using locked ops. spin_unlock on x86 sometime uses locked ops
|
|
- * because of PPro errata 66, 92, so unless somebody can guarantee
|
|
- * atomic_set() here would be safe on all archs (and not only on x86),
|
|
- * it's safer to use atomic_inc()/atomic_add().
|
|
+ * Clone page flags before unfreezing refcount.
|
|
+ *
|
|
+ * After successful get_page_unless_zero() might follow flags change,
|
|
+ * for exmaple lock_page() which set PG_waiters.
|
|
*/
|
|
- if (PageAnon(head) && !PageSwapCache(head)) {
|
|
- page_ref_inc(page_tail);
|
|
- } else {
|
|
- /* Additional pin to radix tree */
|
|
- page_ref_add(page_tail, 2);
|
|
- }
|
|
-
|
|
page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
|
|
page_tail->flags |= (head->flags &
|
|
((1L << PG_referenced) |
|
|
@@ -2344,36 +2331,42 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
|
(1L << PG_unevictable) |
|
|
(1L << PG_dirty)));
|
|
|
|
- /*
|
|
- * After clearing PageTail the gup refcount can be released.
|
|
- * Page flags also must be visible before we make the page non-compound.
|
|
- */
|
|
+ /* ->mapping in first tail page is compound_mapcount */
|
|
+ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
|
+ page_tail);
|
|
+ page_tail->mapping = head->mapping;
|
|
+ page_tail->index = head->index + tail;
|
|
+
|
|
+ /* Page flags must be visible before we make the page non-compound. */
|
|
smp_wmb();
|
|
|
|
+ /*
|
|
+ * Clear PageTail before unfreezing page refcount.
|
|
+ *
|
|
+ * After successful get_page_unless_zero() might follow put_page()
|
|
+ * which needs correct compound_head().
|
|
+ */
|
|
clear_compound_head(page_tail);
|
|
|
|
+ /* Finally unfreeze refcount. Additional reference from page cache. */
|
|
+ page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
|
|
+ PageSwapCache(head)));
|
|
+
|
|
if (page_is_young(head))
|
|
set_page_young(page_tail);
|
|
if (page_is_idle(head))
|
|
set_page_idle(page_tail);
|
|
|
|
- /* ->mapping in first tail page is compound_mapcount */
|
|
- VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
|
- page_tail);
|
|
- page_tail->mapping = head->mapping;
|
|
-
|
|
- page_tail->index = head->index + tail;
|
|
page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
|
|
lru_add_page_tail(head, page_tail, lruvec, list);
|
|
}
|
|
|
|
static void __split_huge_page(struct page *page, struct list_head *list,
|
|
- unsigned long flags)
|
|
+ pgoff_t end, unsigned long flags)
|
|
{
|
|
struct page *head = compound_head(page);
|
|
struct zone *zone = page_zone(head);
|
|
struct lruvec *lruvec;
|
|
- pgoff_t end = -1;
|
|
int i;
|
|
|
|
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
|
|
@@ -2381,9 +2374,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|
/* complete memcg works before add pages to LRU */
|
|
mem_cgroup_split_huge_fixup(head);
|
|
|
|
- if (!PageAnon(page))
|
|
- end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
|
|
-
|
|
for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
|
|
__split_huge_page_tail(head, i, lruvec, list);
|
|
/* Some pages can be beyond i_size: drop them from page cache */
|
|
@@ -2412,7 +2402,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|
|
|
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
|
|
|
- unfreeze_page(head);
|
|
+ remap_page(head);
|
|
|
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
|
struct page *subpage = head + i;
|
|
@@ -2555,6 +2545,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
int count, mapcount, extra_pins, ret;
|
|
bool mlocked;
|
|
unsigned long flags;
|
|
+ pgoff_t end;
|
|
|
|
VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
@@ -2577,6 +2568,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
ret = -EBUSY;
|
|
goto out;
|
|
}
|
|
+ end = -1;
|
|
mapping = NULL;
|
|
anon_vma_lock_write(anon_vma);
|
|
} else {
|
|
@@ -2590,10 +2582,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
|
|
anon_vma = NULL;
|
|
i_mmap_lock_read(mapping);
|
|
+
|
|
+ /*
|
|
+ *__split_huge_page() may need to trim off pages beyond EOF:
|
|
+ * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
|
|
+ * which cannot be nested inside the page tree lock. So note
|
|
+ * end now: i_size itself may be changed at any moment, but
|
|
+ * head page lock is good enough to serialize the trimming.
|
|
+ */
|
|
+ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
|
|
}
|
|
|
|
/*
|
|
- * Racy check if we can split the page, before freeze_page() will
|
|
+ * Racy check if we can split the page, before unmap_page() will
|
|
* split PMDs
|
|
*/
|
|
if (!can_split_huge_page(head, &extra_pins)) {
|
|
@@ -2602,7 +2603,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
}
|
|
|
|
mlocked = PageMlocked(page);
|
|
- freeze_page(head);
|
|
+ unmap_page(head);
|
|
VM_BUG_ON_PAGE(compound_mapcount(head), head);
|
|
|
|
/* Make sure the page is not on per-CPU pagevec as it takes pin */
|
|
@@ -2639,7 +2640,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
if (mapping)
|
|
__dec_node_page_state(page, NR_SHMEM_THPS);
|
|
spin_unlock(&pgdata->split_queue_lock);
|
|
- __split_huge_page(page, list, flags);
|
|
+ __split_huge_page(page, list, end, flags);
|
|
if (PageSwapCache(head)) {
|
|
swp_entry_t entry = { .val = page_private(head) };
|
|
|
|
@@ -2659,7 +2660,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
fail: if (mapping)
|
|
spin_unlock(&mapping->tree_lock);
|
|
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
|
- unfreeze_page(head);
|
|
+ remap_page(head);
|
|
ret = -EBUSY;
|
|
}
|
|
|
|
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
|
|
index 0a5bb3e8a8a3..d27a73737f1a 100644
|
|
--- a/mm/khugepaged.c
|
|
+++ b/mm/khugepaged.c
|
|
@@ -1288,7 +1288,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|
* collapse_shmem - collapse small tmpfs/shmem pages into huge one.
|
|
*
|
|
* Basic scheme is simple, details are more complex:
|
|
- * - allocate and freeze a new huge page;
|
|
+ * - allocate and lock a new huge page;
|
|
* - scan over radix tree replacing old pages the new one
|
|
* + swap in pages if necessary;
|
|
* + fill in gaps;
|
|
@@ -1296,11 +1296,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|
* - if replacing succeed:
|
|
* + copy data over;
|
|
* + free old pages;
|
|
- * + unfreeze huge page;
|
|
+ * + unlock huge page;
|
|
* - if replacing failed;
|
|
* + put all pages back and unfreeze them;
|
|
* + restore gaps in the radix-tree;
|
|
- * + free huge page;
|
|
+ * + unlock and free huge page;
|
|
*/
|
|
static void collapse_shmem(struct mm_struct *mm,
|
|
struct address_space *mapping, pgoff_t start,
|
|
@@ -1333,18 +1333,15 @@ static void collapse_shmem(struct mm_struct *mm,
|
|
goto out;
|
|
}
|
|
|
|
+ __SetPageLocked(new_page);
|
|
+ __SetPageSwapBacked(new_page);
|
|
new_page->index = start;
|
|
new_page->mapping = mapping;
|
|
- __SetPageSwapBacked(new_page);
|
|
- __SetPageLocked(new_page);
|
|
- BUG_ON(!page_ref_freeze(new_page, 1));
|
|
-
|
|
|
|
/*
|
|
- * At this point the new_page is 'frozen' (page_count() is zero), locked
|
|
- * and not up-to-date. It's safe to insert it into radix tree, because
|
|
- * nobody would be able to map it or use it in other way until we
|
|
- * unfreeze it.
|
|
+ * At this point the new_page is locked and not up-to-date.
|
|
+ * It's safe to insert it into the page cache, because nobody would
|
|
+ * be able to map it or use it in another way until we unlock it.
|
|
*/
|
|
|
|
index = start;
|
|
@@ -1352,19 +1349,29 @@ static void collapse_shmem(struct mm_struct *mm,
|
|
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
|
|
int n = min(iter.index, end) - index;
|
|
|
|
+ /*
|
|
+ * Stop if extent has been hole-punched, and is now completely
|
|
+ * empty (the more obvious i_size_read() check would take an
|
|
+ * irq-unsafe seqlock on 32-bit).
|
|
+ */
|
|
+ if (n >= HPAGE_PMD_NR) {
|
|
+ result = SCAN_TRUNCATED;
|
|
+ goto tree_locked;
|
|
+ }
|
|
+
|
|
/*
|
|
* Handle holes in the radix tree: charge it from shmem and
|
|
* insert relevant subpage of new_page into the radix-tree.
|
|
*/
|
|
if (n && !shmem_charge(mapping->host, n)) {
|
|
result = SCAN_FAIL;
|
|
- break;
|
|
+ goto tree_locked;
|
|
}
|
|
- nr_none += n;
|
|
for (; index < min(iter.index, end); index++) {
|
|
radix_tree_insert(&mapping->page_tree, index,
|
|
new_page + (index % HPAGE_PMD_NR));
|
|
}
|
|
+ nr_none += n;
|
|
|
|
/* We are done. */
|
|
if (index >= end)
|
|
@@ -1380,12 +1387,12 @@ static void collapse_shmem(struct mm_struct *mm,
|
|
result = SCAN_FAIL;
|
|
goto tree_unlocked;
|
|
}
|
|
- spin_lock_irq(&mapping->tree_lock);
|
|
} else if (trylock_page(page)) {
|
|
get_page(page);
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
} else {
|
|
result = SCAN_PAGE_LOCK;
|
|
- break;
|
|
+ goto tree_locked;
|
|
}
|
|
|
|
/*
|
|
@@ -1394,17 +1401,24 @@ static void collapse_shmem(struct mm_struct *mm,
|
|
*/
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
VM_BUG_ON_PAGE(!PageUptodate(page), page);
|
|
- VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
|
+
|
|
+ /*
|
|
+ * If file was truncated then extended, or hole-punched, before
|
|
+ * we locked the first page, then a THP might be there already.
|
|
+ */
|
|
+ if (PageTransCompound(page)) {
|
|
+ result = SCAN_PAGE_COMPOUND;
|
|
+ goto out_unlock;
|
|
+ }
|
|
|
|
if (page_mapping(page) != mapping) {
|
|
result = SCAN_TRUNCATED;
|
|
goto out_unlock;
|
|
}
|
|
- spin_unlock_irq(&mapping->tree_lock);
|
|
|
|
if (isolate_lru_page(page)) {
|
|
result = SCAN_DEL_PAGE_LRU;
|
|
- goto out_isolate_failed;
|
|
+ goto out_unlock;
|
|
}
|
|
|
|
if (page_mapped(page))
|
|
@@ -1426,7 +1440,9 @@ static void collapse_shmem(struct mm_struct *mm,
|
|
*/
|
|
if (!page_ref_freeze(page, 3)) {
|
|
result = SCAN_PAGE_COUNT;
|
|
- goto out_lru;
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
+ putback_lru_page(page);
|
|
+ goto out_unlock;
|
|
}
|
|
|
|
/*
|
|
@@ -1442,17 +1458,10 @@ static void collapse_shmem(struct mm_struct *mm,
|
|
slot = radix_tree_iter_resume(slot, &iter);
|
|
index++;
|
|
continue;
|
|
-out_lru:
|
|
- spin_unlock_irq(&mapping->tree_lock);
|
|
- putback_lru_page(page);
|
|
-out_isolate_failed:
|
|
- unlock_page(page);
|
|
- put_page(page);
|
|
- goto tree_unlocked;
|
|
out_unlock:
|
|
unlock_page(page);
|
|
put_page(page);
|
|
- break;
|
|
+ goto tree_unlocked;
|
|
}
|
|
|
|
/*
|
|
@@ -1460,14 +1469,18 @@ out_unlock:
|
|
* This code only triggers if there's nothing in radix tree
|
|
* beyond 'end'.
|
|
*/
|
|
- if (result == SCAN_SUCCEED && index < end) {
|
|
+ if (index < end) {
|
|
int n = end - index;
|
|
|
|
+ /* Stop if extent has been truncated, and is now empty */
|
|
+ if (n >= HPAGE_PMD_NR) {
|
|
+ result = SCAN_TRUNCATED;
|
|
+ goto tree_locked;
|
|
+ }
|
|
if (!shmem_charge(mapping->host, n)) {
|
|
result = SCAN_FAIL;
|
|
goto tree_locked;
|
|
}
|
|
-
|
|
for (; index < end; index++) {
|
|
radix_tree_insert(&mapping->page_tree, index,
|
|
new_page + (index % HPAGE_PMD_NR));
|
|
@@ -1475,57 +1488,62 @@ out_unlock:
|
|
nr_none += n;
|
|
}
|
|
|
|
+ __inc_node_page_state(new_page, NR_SHMEM_THPS);
|
|
+ if (nr_none) {
|
|
+ struct zone *zone = page_zone(new_page);
|
|
+
|
|
+ __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
|
|
+ __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
|
|
+ }
|
|
+
|
|
tree_locked:
|
|
spin_unlock_irq(&mapping->tree_lock);
|
|
tree_unlocked:
|
|
|
|
if (result == SCAN_SUCCEED) {
|
|
- unsigned long flags;
|
|
- struct zone *zone = page_zone(new_page);
|
|
-
|
|
/*
|
|
* Replacing old pages with new one has succeed, now we need to
|
|
* copy the content and free old pages.
|
|
*/
|
|
+ index = start;
|
|
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
|
|
+ while (index < page->index) {
|
|
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
|
|
+ index++;
|
|
+ }
|
|
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
|
|
page);
|
|
list_del(&page->lru);
|
|
- unlock_page(page);
|
|
- page_ref_unfreeze(page, 1);
|
|
page->mapping = NULL;
|
|
+ page_ref_unfreeze(page, 1);
|
|
ClearPageActive(page);
|
|
ClearPageUnevictable(page);
|
|
+ unlock_page(page);
|
|
put_page(page);
|
|
+ index++;
|
|
}
|
|
-
|
|
- local_irq_save(flags);
|
|
- __inc_node_page_state(new_page, NR_SHMEM_THPS);
|
|
- if (nr_none) {
|
|
- __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
|
|
- __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
|
|
+ while (index < end) {
|
|
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
|
|
+ index++;
|
|
}
|
|
- local_irq_restore(flags);
|
|
|
|
- /*
|
|
- * Remove pte page tables, so we can re-faulti
|
|
- * the page as huge.
|
|
- */
|
|
- retract_page_tables(mapping, start);
|
|
-
|
|
- /* Everything is ready, let's unfreeze the new_page */
|
|
- set_page_dirty(new_page);
|
|
SetPageUptodate(new_page);
|
|
- page_ref_unfreeze(new_page, HPAGE_PMD_NR);
|
|
+ page_ref_add(new_page, HPAGE_PMD_NR - 1);
|
|
+ set_page_dirty(new_page);
|
|
mem_cgroup_commit_charge(new_page, memcg, false, true);
|
|
lru_cache_add_anon(new_page);
|
|
- unlock_page(new_page);
|
|
|
|
+ /*
|
|
+ * Remove pte page tables, so we can re-fault the page as huge.
|
|
+ */
|
|
+ retract_page_tables(mapping, start);
|
|
*hpage = NULL;
|
|
} else {
|
|
/* Something went wrong: rollback changes to the radix-tree */
|
|
- shmem_uncharge(mapping->host, nr_none);
|
|
spin_lock_irq(&mapping->tree_lock);
|
|
+ mapping->nrpages -= nr_none;
|
|
+ shmem_uncharge(mapping->host, nr_none);
|
|
+
|
|
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
|
|
start) {
|
|
if (iter.index >= end)
|
|
@@ -1551,19 +1569,18 @@ tree_unlocked:
|
|
slot, page);
|
|
slot = radix_tree_iter_resume(slot, &iter);
|
|
spin_unlock_irq(&mapping->tree_lock);
|
|
- putback_lru_page(page);
|
|
unlock_page(page);
|
|
+ putback_lru_page(page);
|
|
spin_lock_irq(&mapping->tree_lock);
|
|
}
|
|
VM_BUG_ON(nr_none);
|
|
spin_unlock_irq(&mapping->tree_lock);
|
|
|
|
- /* Unfreeze new_page, caller would take care about freeing it */
|
|
- page_ref_unfreeze(new_page, 1);
|
|
mem_cgroup_cancel_charge(new_page, memcg, true);
|
|
- unlock_page(new_page);
|
|
new_page->mapping = NULL;
|
|
}
|
|
+
|
|
+ unlock_page(new_page);
|
|
out:
|
|
VM_BUG_ON(!list_empty(&pagelist));
|
|
/* TODO: tracepoints */
|
|
diff --git a/mm/shmem.c b/mm/shmem.c
|
|
index fa08f56fd5e5..ab7ff0aeae2d 100644
|
|
--- a/mm/shmem.c
|
|
+++ b/mm/shmem.c
|
|
@@ -296,12 +296,14 @@ bool shmem_charge(struct inode *inode, long pages)
|
|
if (!shmem_inode_acct_block(inode, pages))
|
|
return false;
|
|
|
|
+ /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
|
|
+ inode->i_mapping->nrpages += pages;
|
|
+
|
|
spin_lock_irqsave(&info->lock, flags);
|
|
info->alloced += pages;
|
|
inode->i_blocks += pages * BLOCKS_PER_PAGE;
|
|
shmem_recalc_inode(inode);
|
|
spin_unlock_irqrestore(&info->lock, flags);
|
|
- inode->i_mapping->nrpages += pages;
|
|
|
|
return true;
|
|
}
|
|
@@ -311,6 +313,8 @@ void shmem_uncharge(struct inode *inode, long pages)
|
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
|
unsigned long flags;
|
|
|
|
+ /* nrpages adjustment done by __delete_from_page_cache() or caller */
|
|
+
|
|
spin_lock_irqsave(&info->lock, flags);
|
|
info->alloced -= pages;
|
|
inode->i_blocks -= pages * BLOCKS_PER_PAGE;
|
|
@@ -1528,11 +1532,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
|
{
|
|
struct page *oldpage, *newpage;
|
|
struct address_space *swap_mapping;
|
|
+ swp_entry_t entry;
|
|
pgoff_t swap_index;
|
|
int error;
|
|
|
|
oldpage = *pagep;
|
|
- swap_index = page_private(oldpage);
|
|
+ entry.val = page_private(oldpage);
|
|
+ swap_index = swp_offset(entry);
|
|
swap_mapping = page_mapping(oldpage);
|
|
|
|
/*
|
|
@@ -1551,7 +1557,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
|
__SetPageLocked(newpage);
|
|
__SetPageSwapBacked(newpage);
|
|
SetPageUptodate(newpage);
|
|
- set_page_private(newpage, swap_index);
|
|
+ set_page_private(newpage, entry.val);
|
|
SetPageSwapCache(newpage);
|
|
|
|
/*
|
|
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
|
|
index dbde2b3c3c15..fbeee068ea14 100644
|
|
--- a/net/ceph/auth.c
|
|
+++ b/net/ceph/auth.c
|
|
@@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
|
|
}
|
|
EXPORT_SYMBOL(ceph_auth_update_authorizer);
|
|
|
|
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
|
|
+ struct ceph_authorizer *a,
|
|
+ void *challenge_buf,
|
|
+ int challenge_buf_len)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ mutex_lock(&ac->mutex);
|
|
+ if (ac->ops && ac->ops->add_authorizer_challenge)
|
|
+ ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
|
|
+ challenge_buf_len);
|
|
+ mutex_unlock(&ac->mutex);
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
|
|
+
|
|
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
|
|
struct ceph_authorizer *a)
|
|
{
|
|
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
|
|
index 2f4a1baf5f52..2bf9d9f7ddf3 100644
|
|
--- a/net/ceph/auth_x.c
|
|
+++ b/net/ceph/auth_x.c
|
|
@@ -9,6 +9,7 @@
|
|
|
|
#include <linux/ceph/decode.h>
|
|
#include <linux/ceph/auth.h>
|
|
+#include <linux/ceph/ceph_features.h>
|
|
#include <linux/ceph/libceph.h>
|
|
#include <linux/ceph/messenger.h>
|
|
|
|
@@ -70,25 +71,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
|
|
return sizeof(u32) + ciphertext_len;
|
|
}
|
|
|
|
+static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p,
|
|
+ int ciphertext_len)
|
|
+{
|
|
+ struct ceph_x_encrypt_header *hdr = p;
|
|
+ int plaintext_len;
|
|
+ int ret;
|
|
+
|
|
+ ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len,
|
|
+ &plaintext_len);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
|
|
+ pr_err("%s bad magic\n", __func__);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ return plaintext_len - sizeof(*hdr);
|
|
+}
|
|
+
|
|
static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
|
|
{
|
|
- struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
|
|
- int ciphertext_len, plaintext_len;
|
|
+ int ciphertext_len;
|
|
int ret;
|
|
|
|
ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
|
|
ceph_decode_need(p, end, ciphertext_len, e_inval);
|
|
|
|
- ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
|
|
- &plaintext_len);
|
|
- if (ret)
|
|
+ ret = __ceph_x_decrypt(secret, *p, ciphertext_len);
|
|
+ if (ret < 0)
|
|
return ret;
|
|
|
|
- if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
|
|
- return -EPERM;
|
|
-
|
|
*p += ciphertext_len;
|
|
- return plaintext_len - sizeof(struct ceph_x_encrypt_header);
|
|
+ return ret;
|
|
|
|
e_inval:
|
|
return -EINVAL;
|
|
@@ -275,6 +291,51 @@ bad:
|
|
return -EINVAL;
|
|
}
|
|
|
|
+/*
|
|
+ * Encode and encrypt the second part (ceph_x_authorize_b) of the
|
|
+ * authorizer. The first part (ceph_x_authorize_a) should already be
|
|
+ * encoded.
|
|
+ */
|
|
+static int encrypt_authorizer(struct ceph_x_authorizer *au,
|
|
+ u64 *server_challenge)
|
|
+{
|
|
+ struct ceph_x_authorize_a *msg_a;
|
|
+ struct ceph_x_authorize_b *msg_b;
|
|
+ void *p, *end;
|
|
+ int ret;
|
|
+
|
|
+ msg_a = au->buf->vec.iov_base;
|
|
+ WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id));
|
|
+ p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len);
|
|
+ end = au->buf->vec.iov_base + au->buf->vec.iov_len;
|
|
+
|
|
+ msg_b = p + ceph_x_encrypt_offset();
|
|
+ msg_b->struct_v = 2;
|
|
+ msg_b->nonce = cpu_to_le64(au->nonce);
|
|
+ if (server_challenge) {
|
|
+ msg_b->have_challenge = 1;
|
|
+ msg_b->server_challenge_plus_one =
|
|
+ cpu_to_le64(*server_challenge + 1);
|
|
+ } else {
|
|
+ msg_b->have_challenge = 0;
|
|
+ msg_b->server_challenge_plus_one = 0;
|
|
+ }
|
|
+
|
|
+ ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ p += ret;
|
|
+ if (server_challenge) {
|
|
+ WARN_ON(p != end);
|
|
+ } else {
|
|
+ WARN_ON(p > end);
|
|
+ au->buf->vec.iov_len = p - au->buf->vec.iov_base;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
|
|
{
|
|
ceph_crypto_key_destroy(&au->session_key);
|
|
@@ -291,7 +352,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
|
|
int maxlen;
|
|
struct ceph_x_authorize_a *msg_a;
|
|
struct ceph_x_authorize_b *msg_b;
|
|
- void *p, *end;
|
|
int ret;
|
|
int ticket_blob_len =
|
|
(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
|
|
@@ -335,21 +395,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
|
|
dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
|
|
le64_to_cpu(msg_a->ticket_blob.secret_id));
|
|
|
|
- p = msg_a + 1;
|
|
- p += ticket_blob_len;
|
|
- end = au->buf->vec.iov_base + au->buf->vec.iov_len;
|
|
-
|
|
- msg_b = p + ceph_x_encrypt_offset();
|
|
- msg_b->struct_v = 1;
|
|
get_random_bytes(&au->nonce, sizeof(au->nonce));
|
|
- msg_b->nonce = cpu_to_le64(au->nonce);
|
|
- ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
|
|
- if (ret < 0)
|
|
+ ret = encrypt_authorizer(au, NULL);
|
|
+ if (ret) {
|
|
+ pr_err("failed to encrypt authorizer: %d", ret);
|
|
goto out_au;
|
|
+ }
|
|
|
|
- p += ret;
|
|
- WARN_ON(p > end);
|
|
- au->buf->vec.iov_len = p - au->buf->vec.iov_base;
|
|
dout(" built authorizer nonce %llx len %d\n", au->nonce,
|
|
(int)au->buf->vec.iov_len);
|
|
return 0;
|
|
@@ -626,6 +678,54 @@ static int ceph_x_update_authorizer(
|
|
return 0;
|
|
}
|
|
|
|
+static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
|
|
+ void *challenge_buf,
|
|
+ int challenge_buf_len,
|
|
+ u64 *server_challenge)
|
|
+{
|
|
+ struct ceph_x_authorize_challenge *ch =
|
|
+ challenge_buf + sizeof(struct ceph_x_encrypt_header);
|
|
+ int ret;
|
|
+
|
|
+ /* no leading len */
|
|
+ ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
|
|
+ challenge_buf_len);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+ if (ret < sizeof(*ch)) {
|
|
+ pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ *server_challenge = le64_to_cpu(ch->server_challenge);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
|
|
+ struct ceph_authorizer *a,
|
|
+ void *challenge_buf,
|
|
+ int challenge_buf_len)
|
|
+{
|
|
+ struct ceph_x_authorizer *au = (void *)a;
|
|
+ u64 server_challenge;
|
|
+ int ret;
|
|
+
|
|
+ ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
|
|
+ &server_challenge);
|
|
+ if (ret) {
|
|
+ pr_err("failed to decrypt authorize challenge: %d", ret);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ ret = encrypt_authorizer(au, &server_challenge);
|
|
+ if (ret) {
|
|
+ pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
|
|
struct ceph_authorizer *a)
|
|
{
|
|
@@ -637,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
|
|
ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
|
|
if (ret < 0)
|
|
return ret;
|
|
- if (ret != sizeof(*reply))
|
|
- return -EPERM;
|
|
+ if (ret < sizeof(*reply)) {
|
|
+ pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
|
|
+ return -EINVAL;
|
|
+ }
|
|
|
|
if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
|
|
ret = -EPERM;
|
|
@@ -704,26 +806,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
|
|
__le64 *psig)
|
|
{
|
|
void *enc_buf = au->enc_buf;
|
|
- struct {
|
|
- __le32 len;
|
|
- __le32 header_crc;
|
|
- __le32 front_crc;
|
|
- __le32 middle_crc;
|
|
- __le32 data_crc;
|
|
- } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
|
|
int ret;
|
|
|
|
- sigblock->len = cpu_to_le32(4*sizeof(u32));
|
|
- sigblock->header_crc = msg->hdr.crc;
|
|
- sigblock->front_crc = msg->footer.front_crc;
|
|
- sigblock->middle_crc = msg->footer.middle_crc;
|
|
- sigblock->data_crc = msg->footer.data_crc;
|
|
- ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
|
|
- sizeof(*sigblock));
|
|
- if (ret < 0)
|
|
- return ret;
|
|
+ if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) {
|
|
+ struct {
|
|
+ __le32 len;
|
|
+ __le32 header_crc;
|
|
+ __le32 front_crc;
|
|
+ __le32 middle_crc;
|
|
+ __le32 data_crc;
|
|
+ } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
|
|
+
|
|
+ sigblock->len = cpu_to_le32(4*sizeof(u32));
|
|
+ sigblock->header_crc = msg->hdr.crc;
|
|
+ sigblock->front_crc = msg->footer.front_crc;
|
|
+ sigblock->middle_crc = msg->footer.middle_crc;
|
|
+ sigblock->data_crc = msg->footer.data_crc;
|
|
+
|
|
+ ret = ceph_x_encrypt(&au->session_key, enc_buf,
|
|
+ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ *psig = *(__le64 *)(enc_buf + sizeof(u32));
|
|
+ } else {
|
|
+ struct {
|
|
+ __le32 header_crc;
|
|
+ __le32 front_crc;
|
|
+ __le32 front_len;
|
|
+ __le32 middle_crc;
|
|
+ __le32 middle_len;
|
|
+ __le32 data_crc;
|
|
+ __le32 data_len;
|
|
+ __le32 seq_lower_word;
|
|
+ } __packed *sigblock = enc_buf;
|
|
+ struct {
|
|
+ __le64 a, b, c, d;
|
|
+ } __packed *penc = enc_buf;
|
|
+ int ciphertext_len;
|
|
+
|
|
+ sigblock->header_crc = msg->hdr.crc;
|
|
+ sigblock->front_crc = msg->footer.front_crc;
|
|
+ sigblock->front_len = msg->hdr.front_len;
|
|
+ sigblock->middle_crc = msg->footer.middle_crc;
|
|
+ sigblock->middle_len = msg->hdr.middle_len;
|
|
+ sigblock->data_crc = msg->footer.data_crc;
|
|
+ sigblock->data_len = msg->hdr.data_len;
|
|
+ sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
|
|
+
|
|
+ /* no leading len, no ceph_x_encrypt_header */
|
|
+ ret = ceph_crypt(&au->session_key, true, enc_buf,
|
|
+ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
|
|
+ &ciphertext_len);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ *psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
|
|
+ }
|
|
|
|
- *psig = *(__le64 *)(enc_buf + sizeof(u32));
|
|
return 0;
|
|
}
|
|
|
|
@@ -778,6 +918,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
|
|
.handle_reply = ceph_x_handle_reply,
|
|
.create_authorizer = ceph_x_create_authorizer,
|
|
.update_authorizer = ceph_x_update_authorizer,
|
|
+ .add_authorizer_challenge = ceph_x_add_authorizer_challenge,
|
|
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
|
|
.invalidate_authorizer = ceph_x_invalidate_authorizer,
|
|
.reset = ceph_x_reset,
|
|
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
|
|
index 32c13d763b9a..24b0b74564d0 100644
|
|
--- a/net/ceph/auth_x_protocol.h
|
|
+++ b/net/ceph/auth_x_protocol.h
|
|
@@ -70,6 +70,13 @@ struct ceph_x_authorize_a {
|
|
struct ceph_x_authorize_b {
|
|
__u8 struct_v;
|
|
__le64 nonce;
|
|
+ __u8 have_challenge;
|
|
+ __le64 server_challenge_plus_one;
|
|
+} __attribute__ ((packed));
|
|
+
|
|
+struct ceph_x_authorize_challenge {
|
|
+ __u8 struct_v;
|
|
+ __le64 server_challenge;
|
|
} __attribute__ ((packed));
|
|
|
|
struct ceph_x_authorize_reply {
|
|
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
|
|
index 5281da82371a..f864807284d4 100644
|
|
--- a/net/ceph/messenger.c
|
|
+++ b/net/ceph/messenger.c
|
|
@@ -1411,24 +1411,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
|
|
* Connection negotiation.
|
|
*/
|
|
|
|
-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
|
|
- int *auth_proto)
|
|
+static int get_connect_authorizer(struct ceph_connection *con)
|
|
{
|
|
struct ceph_auth_handshake *auth;
|
|
+ int auth_proto;
|
|
|
|
if (!con->ops->get_authorizer) {
|
|
+ con->auth = NULL;
|
|
con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
|
|
con->out_connect.authorizer_len = 0;
|
|
- return NULL;
|
|
+ return 0;
|
|
}
|
|
|
|
- auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
|
|
+ auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
|
|
if (IS_ERR(auth))
|
|
- return auth;
|
|
+ return PTR_ERR(auth);
|
|
|
|
- con->auth_reply_buf = auth->authorizer_reply_buf;
|
|
- con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
|
|
- return auth;
|
|
+ con->auth = auth;
|
|
+ con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
|
|
+ con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
|
|
+ return 0;
|
|
}
|
|
|
|
/*
|
|
@@ -1444,12 +1446,22 @@ static void prepare_write_banner(struct ceph_connection *con)
|
|
con_flag_set(con, CON_FLAG_WRITE_PENDING);
|
|
}
|
|
|
|
+static void __prepare_write_connect(struct ceph_connection *con)
|
|
+{
|
|
+ con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
|
|
+ if (con->auth)
|
|
+ con_out_kvec_add(con, con->auth->authorizer_buf_len,
|
|
+ con->auth->authorizer_buf);
|
|
+
|
|
+ con->out_more = 0;
|
|
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
|
|
+}
|
|
+
|
|
static int prepare_write_connect(struct ceph_connection *con)
|
|
{
|
|
unsigned int global_seq = get_global_seq(con->msgr, 0);
|
|
int proto;
|
|
- int auth_proto;
|
|
- struct ceph_auth_handshake *auth;
|
|
+ int ret;
|
|
|
|
switch (con->peer_name.type) {
|
|
case CEPH_ENTITY_TYPE_MON:
|
|
@@ -1476,24 +1488,11 @@ static int prepare_write_connect(struct ceph_connection *con)
|
|
con->out_connect.protocol_version = cpu_to_le32(proto);
|
|
con->out_connect.flags = 0;
|
|
|
|
- auth_proto = CEPH_AUTH_UNKNOWN;
|
|
- auth = get_connect_authorizer(con, &auth_proto);
|
|
- if (IS_ERR(auth))
|
|
- return PTR_ERR(auth);
|
|
-
|
|
- con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
|
|
- con->out_connect.authorizer_len = auth ?
|
|
- cpu_to_le32(auth->authorizer_buf_len) : 0;
|
|
-
|
|
- con_out_kvec_add(con, sizeof (con->out_connect),
|
|
- &con->out_connect);
|
|
- if (auth && auth->authorizer_buf_len)
|
|
- con_out_kvec_add(con, auth->authorizer_buf_len,
|
|
- auth->authorizer_buf);
|
|
-
|
|
- con->out_more = 0;
|
|
- con_flag_set(con, CON_FLAG_WRITE_PENDING);
|
|
+ ret = get_connect_authorizer(con);
|
|
+ if (ret)
|
|
+ return ret;
|
|
|
|
+ __prepare_write_connect(con);
|
|
return 0;
|
|
}
|
|
|
|
@@ -1753,11 +1752,21 @@ static int read_partial_connect(struct ceph_connection *con)
|
|
if (ret <= 0)
|
|
goto out;
|
|
|
|
- size = le32_to_cpu(con->in_reply.authorizer_len);
|
|
- end += size;
|
|
- ret = read_partial(con, end, size, con->auth_reply_buf);
|
|
- if (ret <= 0)
|
|
- goto out;
|
|
+ if (con->auth) {
|
|
+ size = le32_to_cpu(con->in_reply.authorizer_len);
|
|
+ if (size > con->auth->authorizer_reply_buf_len) {
|
|
+ pr_err("authorizer reply too big: %d > %zu\n", size,
|
|
+ con->auth->authorizer_reply_buf_len);
|
|
+ ret = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ end += size;
|
|
+ ret = read_partial(con, end, size,
|
|
+ con->auth->authorizer_reply_buf);
|
|
+ if (ret <= 0)
|
|
+ goto out;
|
|
+ }
|
|
|
|
dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
|
|
con, (int)con->in_reply.tag,
|
|
@@ -1765,7 +1774,6 @@ static int read_partial_connect(struct ceph_connection *con)
|
|
le32_to_cpu(con->in_reply.global_seq));
|
|
out:
|
|
return ret;
|
|
-
|
|
}
|
|
|
|
/*
|
|
@@ -2048,12 +2056,27 @@ static int process_connect(struct ceph_connection *con)
|
|
|
|
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
|
|
|
|
- if (con->auth_reply_buf) {
|
|
+ if (con->auth) {
|
|
/*
|
|
* Any connection that defines ->get_authorizer()
|
|
- * should also define ->verify_authorizer_reply().
|
|
+ * should also define ->add_authorizer_challenge() and
|
|
+ * ->verify_authorizer_reply().
|
|
+ *
|
|
* See get_connect_authorizer().
|
|
*/
|
|
+ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
|
|
+ ret = con->ops->add_authorizer_challenge(
|
|
+ con, con->auth->authorizer_reply_buf,
|
|
+ le32_to_cpu(con->in_reply.authorizer_len));
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ con_out_kvec_reset(con);
|
|
+ __prepare_write_connect(con);
|
|
+ prepare_read_connect(con);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
ret = con->ops->verify_authorizer_reply(con);
|
|
if (ret < 0) {
|
|
con->error_msg = "bad authorize reply";
|
|
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
|
|
index 2814dba5902d..53ea2d48896c 100644
|
|
--- a/net/ceph/osd_client.c
|
|
+++ b/net/ceph/osd_client.c
|
|
@@ -5292,6 +5292,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
|
|
return auth;
|
|
}
|
|
|
|
+static int add_authorizer_challenge(struct ceph_connection *con,
|
|
+ void *challenge_buf, int challenge_buf_len)
|
|
+{
|
|
+ struct ceph_osd *o = con->private;
|
|
+ struct ceph_osd_client *osdc = o->o_osdc;
|
|
+ struct ceph_auth_client *ac = osdc->client->monc.auth;
|
|
+
|
|
+ return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
|
|
+ challenge_buf, challenge_buf_len);
|
|
+}
|
|
|
|
static int verify_authorizer_reply(struct ceph_connection *con)
|
|
{
|
|
@@ -5341,6 +5351,7 @@ static const struct ceph_connection_operations osd_con_ops = {
|
|
.put = put_osd_con,
|
|
.dispatch = dispatch,
|
|
.get_authorizer = get_authorizer,
|
|
+ .add_authorizer_challenge = add_authorizer_challenge,
|
|
.verify_authorizer_reply = verify_authorizer_reply,
|
|
.invalidate_authorizer = invalidate_authorizer,
|
|
.alloc_msg = alloc_msg,
|
|
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
|
|
index c19a118f9f82..4067fa3fcbb2 100644
|
|
--- a/net/core/skbuff.c
|
|
+++ b/net/core/skbuff.c
|
|
@@ -4882,6 +4882,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
|
|
nf_reset(skb);
|
|
nf_reset_trace(skb);
|
|
|
|
+#ifdef CONFIG_NET_SWITCHDEV
|
|
+ skb->offload_fwd_mark = 0;
|
|
+#endif
|
|
+
|
|
if (!xnet)
|
|
return;
|
|
|
|
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
|
|
index 8d1a7c900393..88d5b2645bb0 100644
|
|
--- a/net/packet/af_packet.c
|
|
+++ b/net/packet/af_packet.c
|
|
@@ -2433,7 +2433,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
|
|
void *ph;
|
|
__u32 ts;
|
|
|
|
- ph = skb_shinfo(skb)->destructor_arg;
|
|
+ ph = skb_zcopy_get_nouarg(skb);
|
|
packet_dec_pending(&po->tx_ring);
|
|
|
|
ts = __packet_set_timestamp(po, ph, skb);
|
|
@@ -2499,7 +2499,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
|
|
skb->priority = po->sk.sk_priority;
|
|
skb->mark = po->sk.sk_mark;
|
|
sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
|
|
- skb_shinfo(skb)->destructor_arg = ph.raw;
|
|
+ skb_zcopy_set_nouarg(skb, ph.raw);
|
|
|
|
skb_reserve(skb, hlen);
|
|
skb_reset_network_header(skb);
|
|
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
|
|
index 4f2971f528db..e903bdd39b9f 100644
|
|
--- a/net/tls/tls_main.c
|
|
+++ b/net/tls/tls_main.c
|
|
@@ -46,8 +46,28 @@ MODULE_DESCRIPTION("Transport Layer Security Support");
|
|
MODULE_LICENSE("Dual BSD/GPL");
|
|
MODULE_ALIAS_TCP_ULP("tls");
|
|
|
|
-static struct proto tls_base_prot;
|
|
-static struct proto tls_sw_prot;
|
|
+enum {
|
|
+ TLSV4,
|
|
+ TLSV6,
|
|
+ TLS_NUM_PROTS,
|
|
+};
|
|
+
|
|
+enum {
|
|
+ TLS_BASE_TX,
|
|
+ TLS_SW_TX,
|
|
+ TLS_NUM_CONFIG,
|
|
+};
|
|
+
|
|
+static struct proto *saved_tcpv6_prot;
|
|
+static DEFINE_MUTEX(tcpv6_prot_mutex);
|
|
+static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
|
|
+
|
|
+static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
|
|
+{
|
|
+ int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
|
|
+
|
|
+ sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf];
|
|
+}
|
|
|
|
int wait_on_pending_writer(struct sock *sk, long *timeo)
|
|
{
|
|
@@ -239,6 +259,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
|
|
void (*sk_proto_close)(struct sock *sk, long timeout);
|
|
|
|
lock_sock(sk);
|
|
+ sk_proto_close = ctx->sk_proto_close;
|
|
+
|
|
+ if (ctx->tx_conf == TLS_BASE_TX) {
|
|
+ tls_ctx_free(ctx);
|
|
+ goto skip_tx_cleanup;
|
|
+ }
|
|
|
|
if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
|
|
tls_handle_open_record(sk, 0);
|
|
@@ -255,13 +281,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
|
|
sg++;
|
|
}
|
|
}
|
|
- ctx->free_resources(sk);
|
|
+
|
|
kfree(ctx->rec_seq);
|
|
kfree(ctx->iv);
|
|
|
|
- sk_proto_close = ctx->sk_proto_close;
|
|
- tls_ctx_free(ctx);
|
|
+ if (ctx->tx_conf == TLS_SW_TX) {
|
|
+ tls_sw_free_tx_resources(sk);
|
|
+ tls_ctx_free(ctx);
|
|
+ }
|
|
|
|
+skip_tx_cleanup:
|
|
release_sock(sk);
|
|
sk_proto_close(sk, timeout);
|
|
}
|
|
@@ -362,48 +391,43 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
|
|
static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
|
|
unsigned int optlen)
|
|
{
|
|
- struct tls_crypto_info *crypto_info, tmp_crypto_info;
|
|
+ struct tls_crypto_info *crypto_info;
|
|
struct tls_context *ctx = tls_get_ctx(sk);
|
|
- struct proto *prot = NULL;
|
|
int rc = 0;
|
|
+ int tx_conf;
|
|
|
|
if (!optval || (optlen < sizeof(*crypto_info))) {
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
- rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
|
|
+ crypto_info = &ctx->crypto_send.info;
|
|
+ /* Currently we don't support set crypto info more than one time */
|
|
+ if (TLS_CRYPTO_INFO_READY(crypto_info)) {
|
|
+ rc = -EBUSY;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
|
|
if (rc) {
|
|
rc = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
/* check version */
|
|
- if (tmp_crypto_info.version != TLS_1_2_VERSION) {
|
|
+ if (crypto_info->version != TLS_1_2_VERSION) {
|
|
rc = -ENOTSUPP;
|
|
- goto out;
|
|
- }
|
|
-
|
|
- /* get user crypto info */
|
|
- crypto_info = &ctx->crypto_send.info;
|
|
-
|
|
- /* Currently we don't support set crypto info more than one time */
|
|
- if (TLS_CRYPTO_INFO_READY(crypto_info)) {
|
|
- rc = -EBUSY;
|
|
- goto out;
|
|
+ goto err_crypto_info;
|
|
}
|
|
|
|
- switch (tmp_crypto_info.cipher_type) {
|
|
+ switch (crypto_info->cipher_type) {
|
|
case TLS_CIPHER_AES_GCM_128: {
|
|
if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
|
|
rc = -EINVAL;
|
|
goto err_crypto_info;
|
|
}
|
|
- rc = copy_from_user(
|
|
- crypto_info,
|
|
- optval,
|
|
- sizeof(struct tls12_crypto_info_aes_gcm_128));
|
|
-
|
|
+ rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
|
|
+ optlen - sizeof(*crypto_info));
|
|
if (rc) {
|
|
rc = -EFAULT;
|
|
goto err_crypto_info;
|
|
@@ -415,18 +439,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
|
|
goto err_crypto_info;
|
|
}
|
|
|
|
- ctx->sk_write_space = sk->sk_write_space;
|
|
- sk->sk_write_space = tls_write_space;
|
|
-
|
|
- ctx->sk_proto_close = sk->sk_prot->close;
|
|
-
|
|
/* currently SW is default, we will have ethtool in future */
|
|
rc = tls_set_sw_offload(sk, ctx);
|
|
- prot = &tls_sw_prot;
|
|
+ tx_conf = TLS_SW_TX;
|
|
if (rc)
|
|
goto err_crypto_info;
|
|
|
|
- sk->sk_prot = prot;
|
|
+ ctx->tx_conf = tx_conf;
|
|
+ update_sk_prot(sk, ctx);
|
|
+ ctx->sk_write_space = sk->sk_write_space;
|
|
+ sk->sk_write_space = tls_write_space;
|
|
goto out;
|
|
|
|
err_crypto_info:
|
|
@@ -464,8 +486,21 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
|
|
return do_tls_setsockopt(sk, optname, optval, optlen);
|
|
}
|
|
|
|
+static void build_protos(struct proto *prot, struct proto *base)
|
|
+{
|
|
+ prot[TLS_BASE_TX] = *base;
|
|
+ prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
|
|
+ prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
|
|
+ prot[TLS_BASE_TX].close = tls_sk_proto_close;
|
|
+
|
|
+ prot[TLS_SW_TX] = prot[TLS_BASE_TX];
|
|
+ prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
|
|
+ prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
|
|
+}
|
|
+
|
|
static int tls_init(struct sock *sk)
|
|
{
|
|
+ int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
|
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
|
struct tls_context *ctx;
|
|
int rc = 0;
|
|
@@ -488,7 +523,21 @@ static int tls_init(struct sock *sk)
|
|
icsk->icsk_ulp_data = ctx;
|
|
ctx->setsockopt = sk->sk_prot->setsockopt;
|
|
ctx->getsockopt = sk->sk_prot->getsockopt;
|
|
- sk->sk_prot = &tls_base_prot;
|
|
+ ctx->sk_proto_close = sk->sk_prot->close;
|
|
+
|
|
+ /* Build IPv6 TLS whenever the address of tcpv6_prot changes */
|
|
+ if (ip_ver == TLSV6 &&
|
|
+ unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
|
|
+ mutex_lock(&tcpv6_prot_mutex);
|
|
+ if (likely(sk->sk_prot != saved_tcpv6_prot)) {
|
|
+ build_protos(tls_prots[TLSV6], sk->sk_prot);
|
|
+ smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
|
|
+ }
|
|
+ mutex_unlock(&tcpv6_prot_mutex);
|
|
+ }
|
|
+
|
|
+ ctx->tx_conf = TLS_BASE_TX;
|
|
+ update_sk_prot(sk, ctx);
|
|
out:
|
|
return rc;
|
|
}
|
|
@@ -501,14 +550,7 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
|
|
|
|
static int __init tls_register(void)
|
|
{
|
|
- tls_base_prot = tcp_prot;
|
|
- tls_base_prot.setsockopt = tls_setsockopt;
|
|
- tls_base_prot.getsockopt = tls_getsockopt;
|
|
-
|
|
- tls_sw_prot = tls_base_prot;
|
|
- tls_sw_prot.sendmsg = tls_sw_sendmsg;
|
|
- tls_sw_prot.sendpage = tls_sw_sendpage;
|
|
- tls_sw_prot.close = tls_sk_proto_close;
|
|
+ build_protos(tls_prots[TLSV4], &tcp_prot);
|
|
|
|
tcp_register_ulp(&tcp_tls_ulp_ops);
|
|
|
|
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
|
|
index 6ae9ca567d6c..d18d4a478e4f 100644
|
|
--- a/net/tls/tls_sw.c
|
|
+++ b/net/tls/tls_sw.c
|
|
@@ -388,7 +388,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
|
|
{
|
|
struct tls_context *tls_ctx = tls_get_ctx(sk);
|
|
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
|
|
- int ret = 0;
|
|
+ int ret;
|
|
int required_size;
|
|
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
|
|
bool eor = !(msg->msg_flags & MSG_MORE);
|
|
@@ -403,7 +403,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
|
|
|
|
lock_sock(sk);
|
|
|
|
- if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo))
|
|
+ ret = tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo);
|
|
+ if (ret)
|
|
goto send_end;
|
|
|
|
if (unlikely(msg->msg_controllen)) {
|
|
@@ -539,7 +540,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
|
|
{
|
|
struct tls_context *tls_ctx = tls_get_ctx(sk);
|
|
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
|
|
- int ret = 0;
|
|
+ int ret;
|
|
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
|
|
bool eor;
|
|
size_t orig_size = size;
|
|
@@ -559,7 +560,8 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
|
|
|
|
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
|
|
|
- if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo))
|
|
+ ret = tls_complete_pending_work(sk, tls_ctx, flags, &timeo);
|
|
+ if (ret)
|
|
goto sendpage_end;
|
|
|
|
/* Call the sk_stream functions to manage the sndbuf mem. */
|
|
@@ -646,7 +648,7 @@ sendpage_end:
|
|
return ret;
|
|
}
|
|
|
|
-static void tls_sw_free_resources(struct sock *sk)
|
|
+void tls_sw_free_tx_resources(struct sock *sk)
|
|
{
|
|
struct tls_context *tls_ctx = tls_get_ctx(sk);
|
|
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
|
|
@@ -685,7 +687,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
|
|
}
|
|
|
|
ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
|
|
- ctx->free_resources = tls_sw_free_resources;
|
|
|
|
crypto_info = &ctx->crypto_send.info;
|
|
switch (crypto_info->cipher_type) {
|
|
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
|
|
index 7143da06d702..be9e5deb58ba 100644
|
|
--- a/scripts/Makefile.build
|
|
+++ b/scripts/Makefile.build
|
|
@@ -272,10 +272,8 @@ else
|
|
objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
|
|
endif
|
|
ifdef CONFIG_RETPOLINE
|
|
-ifneq ($(RETPOLINE_CFLAGS),)
|
|
objtool_args += --retpoline
|
|
endif
|
|
-endif
|
|
|
|
|
|
ifdef CONFIG_MODVERSIONS
|
|
diff --git a/sound/core/control.c b/sound/core/control.c
|
|
index af7e6165e21e..36571cd49be3 100644
|
|
--- a/sound/core/control.c
|
|
+++ b/sound/core/control.c
|
|
@@ -347,6 +347,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
|
|
return 0;
|
|
}
|
|
|
|
+/* add a new kcontrol object; call with card->controls_rwsem locked */
|
|
+static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
|
|
+{
|
|
+ struct snd_ctl_elem_id id;
|
|
+ unsigned int idx;
|
|
+ unsigned int count;
|
|
+
|
|
+ id = kcontrol->id;
|
|
+ if (id.index > UINT_MAX - kcontrol->count)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (snd_ctl_find_id(card, &id)) {
|
|
+ dev_err(card->dev,
|
|
+ "control %i:%i:%i:%s:%i is already present\n",
|
|
+ id.iface, id.device, id.subdevice, id.name, id.index);
|
|
+ return -EBUSY;
|
|
+ }
|
|
+
|
|
+ if (snd_ctl_find_hole(card, kcontrol->count) < 0)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ list_add_tail(&kcontrol->list, &card->controls);
|
|
+ card->controls_count += kcontrol->count;
|
|
+ kcontrol->id.numid = card->last_numid + 1;
|
|
+ card->last_numid += kcontrol->count;
|
|
+
|
|
+ id = kcontrol->id;
|
|
+ count = kcontrol->count;
|
|
+ for (idx = 0; idx < count; idx++, id.index++, id.numid++)
|
|
+ snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
/**
|
|
* snd_ctl_add - add the control instance to the card
|
|
* @card: the card instance
|
|
@@ -363,45 +397,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
|
|
*/
|
|
int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
|
|
{
|
|
- struct snd_ctl_elem_id id;
|
|
- unsigned int idx;
|
|
- unsigned int count;
|
|
int err = -EINVAL;
|
|
|
|
if (! kcontrol)
|
|
return err;
|
|
if (snd_BUG_ON(!card || !kcontrol->info))
|
|
goto error;
|
|
- id = kcontrol->id;
|
|
- if (id.index > UINT_MAX - kcontrol->count)
|
|
- goto error;
|
|
|
|
down_write(&card->controls_rwsem);
|
|
- if (snd_ctl_find_id(card, &id)) {
|
|
- up_write(&card->controls_rwsem);
|
|
- dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n",
|
|
- id.iface,
|
|
- id.device,
|
|
- id.subdevice,
|
|
- id.name,
|
|
- id.index);
|
|
- err = -EBUSY;
|
|
- goto error;
|
|
- }
|
|
- if (snd_ctl_find_hole(card, kcontrol->count) < 0) {
|
|
- up_write(&card->controls_rwsem);
|
|
- err = -ENOMEM;
|
|
- goto error;
|
|
- }
|
|
- list_add_tail(&kcontrol->list, &card->controls);
|
|
- card->controls_count += kcontrol->count;
|
|
- kcontrol->id.numid = card->last_numid + 1;
|
|
- card->last_numid += kcontrol->count;
|
|
- id = kcontrol->id;
|
|
- count = kcontrol->count;
|
|
+ err = __snd_ctl_add(card, kcontrol);
|
|
up_write(&card->controls_rwsem);
|
|
- for (idx = 0; idx < count; idx++, id.index++, id.numid++)
|
|
- snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
|
|
+ if (err < 0)
|
|
+ goto error;
|
|
return 0;
|
|
|
|
error:
|
|
@@ -1360,9 +1367,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
|
|
kctl->tlv.c = snd_ctl_elem_user_tlv;
|
|
|
|
/* This function manage to free the instance on failure. */
|
|
- err = snd_ctl_add(card, kctl);
|
|
- if (err < 0)
|
|
- return err;
|
|
+ down_write(&card->controls_rwsem);
|
|
+ err = __snd_ctl_add(card, kctl);
|
|
+ if (err < 0) {
|
|
+ snd_ctl_free_one(kctl);
|
|
+ goto unlock;
|
|
+ }
|
|
offset = snd_ctl_get_ioff(kctl, &info->id);
|
|
snd_ctl_build_ioff(&info->id, kctl, offset);
|
|
/*
|
|
@@ -1373,10 +1383,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
|
|
* which locks the element.
|
|
*/
|
|
|
|
- down_write(&card->controls_rwsem);
|
|
card->user_ctl_count++;
|
|
- up_write(&card->controls_rwsem);
|
|
|
|
+ unlock:
|
|
+ up_write(&card->controls_rwsem);
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c
|
|
index 8a852042a066..91cd305cabd7 100644
|
|
--- a/sound/isa/wss/wss_lib.c
|
|
+++ b/sound/isa/wss/wss_lib.c
|
|
@@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream)
|
|
if (err < 0) {
|
|
if (chip->release_dma)
|
|
chip->release_dma(chip, chip->dma_private_data, chip->dma1);
|
|
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
|
|
return err;
|
|
}
|
|
chip->playback_substream = substream;
|
|
@@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream)
|
|
if (err < 0) {
|
|
if (chip->release_dma)
|
|
chip->release_dma(chip, chip->dma_private_data, chip->dma2);
|
|
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
|
|
return err;
|
|
}
|
|
chip->capture_substream = substream;
|
|
diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
|
|
index 1ef7cdf1d3e8..38f355ae1863 100644
|
|
--- a/sound/pci/ac97/ac97_codec.c
|
|
+++ b/sound/pci/ac97/ac97_codec.c
|
|
@@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_
|
|
{
|
|
struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol);
|
|
int reg = kcontrol->private_value & 0xff;
|
|
- int shift = (kcontrol->private_value >> 8) & 0xff;
|
|
+ int shift = (kcontrol->private_value >> 8) & 0x0f;
|
|
int mask = (kcontrol->private_value >> 16) & 0xff;
|
|
// int invert = (kcontrol->private_value >> 24) & 0xff;
|
|
unsigned short value, old, new;
|
|
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
|
|
index eb8807de3ebc..66b0a124beae 100644
|
|
--- a/sound/pci/hda/patch_realtek.c
|
|
+++ b/sound/pci/hda/patch_realtek.c
|
|
@@ -343,6 +343,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
|
|
case 0x10ec0285:
|
|
case 0x10ec0298:
|
|
case 0x10ec0289:
|
|
+ case 0x10ec0300:
|
|
alc_update_coef_idx(codec, 0x10, 1<<9, 0);
|
|
break;
|
|
case 0x10ec0275:
|
|
@@ -2758,6 +2759,7 @@ enum {
|
|
ALC269_TYPE_ALC215,
|
|
ALC269_TYPE_ALC225,
|
|
ALC269_TYPE_ALC294,
|
|
+ ALC269_TYPE_ALC300,
|
|
ALC269_TYPE_ALC700,
|
|
};
|
|
|
|
@@ -2792,6 +2794,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
|
|
case ALC269_TYPE_ALC215:
|
|
case ALC269_TYPE_ALC225:
|
|
case ALC269_TYPE_ALC294:
|
|
+ case ALC269_TYPE_ALC300:
|
|
case ALC269_TYPE_ALC700:
|
|
ssids = alc269_ssids;
|
|
break;
|
|
@@ -6408,6 +6411,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
|
|
SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8),
|
|
SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
|
|
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
|
|
+ SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
|
|
SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
|
|
SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
|
|
SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
|
|
@@ -7089,6 +7093,10 @@ static int patch_alc269(struct hda_codec *codec)
|
|
spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */
|
|
alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */
|
|
break;
|
|
+ case 0x10ec0300:
|
|
+ spec->codec_variant = ALC269_TYPE_ALC300;
|
|
+ spec->gen.mixer_nid = 0; /* no loopback on ALC300 */
|
|
+ break;
|
|
case 0x10ec0700:
|
|
case 0x10ec0701:
|
|
case 0x10ec0703:
|
|
@@ -8160,6 +8168,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
|
|
HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269),
|
|
HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
|
|
HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269),
|
|
+ HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269),
|
|
HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
|
|
HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
|
|
HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861),
|
|
diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
|
|
index e73c962590eb..079063d8038d 100644
|
|
--- a/sound/sparc/cs4231.c
|
|
+++ b/sound/sparc/cs4231.c
|
|
@@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream)
|
|
runtime->hw = snd_cs4231_playback;
|
|
|
|
err = snd_cs4231_open(chip, CS4231_MODE_PLAY);
|
|
- if (err < 0) {
|
|
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
|
|
+ if (err < 0)
|
|
return err;
|
|
- }
|
|
chip->playback_substream = substream;
|
|
chip->p_periods_sent = 0;
|
|
snd_pcm_set_sync(substream);
|
|
@@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream)
|
|
runtime->hw = snd_cs4231_capture;
|
|
|
|
err = snd_cs4231_open(chip, CS4231_MODE_RECORD);
|
|
- if (err < 0) {
|
|
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
|
|
+ if (err < 0)
|
|
return err;
|
|
- }
|
|
chip->capture_substream = substream;
|
|
chip->c_periods_sent = 0;
|
|
snd_pcm_set_sync(substream);
|