rebase patches
and drop those applied in 4.14/4.15
This commit is contained in:
parent
55f9bfa990
commit
15baf5b4c2
patches/kernel
0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch0002-bridge-keep-MAC-of-first-assigned-port.patch0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch0004-kvm-disable-default-dynamic-halt-polling-growth.patch0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch0007-KVM-x86-fix-APIC-page-invalidation.patch0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch0008-vhost-fix-skb-leak-in-handle_rx.patch0009-tun-free-skb-in-early-errors.patch0010-tap-free-skb-if-flags-error.patch0011-IB-core-Avoid-crash-on-pkey-enforcement-failed-in-re.patch0012-IB-core-Don-t-enforce-PKey-security-on-SMI-MADs.patch0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch0014-KVM-SVM-obey-guest-PAT.patch0015-net-sched-em_nbyte-don-t-add-the-data-offset-twice.patch0016-net-sched-fix-TCF_LAYER_LINK-case-in-tcf_get_base_pt.patch0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch0018-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch0019-EDAC-sb_edac-Don-t-create-a-second-memory-controller.patch0020-EDAC-sb_edac-Fix-missing-break-in-switch.patch0021-scsi-lpfc-Fix-loop-mode-target-discovery.patch0022-sched-wait-Fix-add_wait_queue-behavioral-change.patch0023-module-retpoline-Warn-about-missing-retpoline-in-mod.patch0024-net-tcp-close-sock-if-net-namespace-is-exiting.patch0027-lockd-lost-rollback-of-set_grace_period-in-lockd_dow.patch
@ -20,10 +20,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
|
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
|
||||||
index fd8fdb91581d..1e35ac9fc810 100755
|
index 87f1fc9801d7..4ef868f1f244 100755
|
||||||
--- a/scripts/mkcompile_h
|
--- a/scripts/mkcompile_h
|
||||||
+++ b/scripts/mkcompile_h
|
+++ b/scripts/mkcompile_h
|
||||||
@@ -37,10 +37,14 @@ else
|
@@ -33,10 +33,14 @@ else
|
||||||
VERSION=$KBUILD_BUILD_VERSION
|
VERSION=$KBUILD_BUILD_VERSION
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||||||
1 file changed, 1 insertion(+), 4 deletions(-)
|
1 file changed, 1 insertion(+), 4 deletions(-)
|
||||||
|
|
||||||
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
|
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
|
||||||
index 89110319ef0f..5e73fff65f47 100644
|
index 808e2b914015..b0ad54384826 100644
|
||||||
--- a/net/bridge/br_stp_if.c
|
--- a/net/bridge/br_stp_if.c
|
||||||
+++ b/net/bridge/br_stp_if.c
|
+++ b/net/bridge/br_stp_if.c
|
||||||
@@ -259,10 +259,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
|
@@ -259,10 +259,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
|
||||||
|
@ -74,7 +74,7 @@ index 27ca3fbc47aa..5e3caff3fb49 100644
|
|||||||
Safety option to keep boot IRQs enabled. This
|
Safety option to keep boot IRQs enabled. This
|
||||||
should never be necessary.
|
should never be necessary.
|
||||||
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
|
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
|
||||||
index db82bef43b99..d338fdb7c402 100644
|
index db82bef43b99..ed94ba0d0922 100644
|
||||||
--- a/drivers/pci/quirks.c
|
--- a/drivers/pci/quirks.c
|
||||||
+++ b/drivers/pci/quirks.c
|
+++ b/drivers/pci/quirks.c
|
||||||
@@ -3695,6 +3695,106 @@ static int __init pci_apply_final_quirks(void)
|
@@ -3695,6 +3695,106 @@ static int __init pci_apply_final_quirks(void)
|
||||||
|
@ -12,7 +12,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
|
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
|
||||||
index 4d81f6ded88e..bfa9c4d34102 100644
|
index 210bf820385a..5b7e582f3742 100644
|
||||||
--- a/virt/kvm/kvm_main.c
|
--- a/virt/kvm/kvm_main.c
|
||||||
+++ b/virt/kvm/kvm_main.c
|
+++ b/virt/kvm/kvm_main.c
|
||||||
@@ -77,7 +77,7 @@ module_param(halt_poll_ns, uint, 0644);
|
@@ -77,7 +77,7 @@ module_param(halt_poll_ns, uint, 0644);
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Waiman Long <longman@redhat.com>
|
|
||||||
Date: Thu, 17 Aug 2017 15:33:09 -0400
|
|
||||||
Subject: [PATCH] cgroup: Add mount flag to enable cpuset to use v2 behavior in
|
|
||||||
v1 cgroup
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
A new mount option "cpuset_v2_mode" is added to the v1 cgroupfs
|
|
||||||
filesystem to enable cpuset controller to use v2 behavior in a v1
|
|
||||||
cgroup. This mount option applies only to cpuset controller and have
|
|
||||||
no effect on other controllers.
|
|
||||||
|
|
||||||
Signed-off-by: Waiman Long <longman@redhat.com>
|
|
||||||
Signed-off-by: Tejun Heo <tj@kernel.org>
|
|
||||||
(cherry-picked from e1cba4b85daa71b710384d451ff6238d5e4d1ff6)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
include/linux/cgroup-defs.h | 5 +++++
|
|
||||||
kernel/cgroup/cgroup-v1.c | 6 ++++++
|
|
||||||
2 files changed, 11 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
|
|
||||||
index 09f4c7df1478..c344e77707a5 100644
|
|
||||||
--- a/include/linux/cgroup-defs.h
|
|
||||||
+++ b/include/linux/cgroup-defs.h
|
|
||||||
@@ -74,6 +74,11 @@ enum {
|
|
||||||
* aren't writeable from inside the namespace.
|
|
||||||
*/
|
|
||||||
CGRP_ROOT_NS_DELEGATE = (1 << 3),
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * Enable cpuset controller in v1 cgroup to use v2 behavior.
|
|
||||||
+ */
|
|
||||||
+ CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
|
|
||||||
};
|
|
||||||
|
|
||||||
/* cftype->flags */
|
|
||||||
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
|
|
||||||
index 7bf4b1533f34..ce7426b875f5 100644
|
|
||||||
--- a/kernel/cgroup/cgroup-v1.c
|
|
||||||
+++ b/kernel/cgroup/cgroup-v1.c
|
|
||||||
@@ -846,6 +846,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
|
|
||||||
seq_puts(seq, ",noprefix");
|
|
||||||
if (root->flags & CGRP_ROOT_XATTR)
|
|
||||||
seq_puts(seq, ",xattr");
|
|
||||||
+ if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
|
|
||||||
+ seq_puts(seq, ",cpuset_v2_mode");
|
|
||||||
|
|
||||||
spin_lock(&release_agent_path_lock);
|
|
||||||
if (strlen(root->release_agent_path))
|
|
||||||
@@ -900,6 +902,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
|
|
||||||
opts->cpuset_clone_children = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
+ if (!strcmp(token, "cpuset_v2_mode")) {
|
|
||||||
+ opts->flags |= CGRP_ROOT_CPUSET_V2_MODE;
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
if (!strcmp(token, "xattr")) {
|
|
||||||
opts->flags |= CGRP_ROOT_XATTR;
|
|
||||||
continue;
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -54,7 +54,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||||||
1 file changed, 4 insertions(+), 6 deletions(-)
|
1 file changed, 4 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
|
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
|
||||||
index 989a900383b5..e1a3ae4f3cab 100644
|
index 6a38c2503649..91813e686c67 100644
|
||||||
--- a/net/sctp/protocol.c
|
--- a/net/sctp/protocol.c
|
||||||
+++ b/net/sctp/protocol.c
|
+++ b/net/sctp/protocol.c
|
||||||
@@ -514,22 +514,20 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
|
@@ -514,22 +514,20 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
|
@ -1,141 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Waiman Long <longman@redhat.com>
|
|
||||||
Date: Thu, 17 Aug 2017 15:33:10 -0400
|
|
||||||
Subject: [PATCH] cpuset: Allow v2 behavior in v1 cgroup
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Cpuset v2 has some useful behaviors that are not present in v1 because
|
|
||||||
of backward compatibility concern. One of that is the restoration of
|
|
||||||
the original cpu and memory node mask after a hot removal and addition
|
|
||||||
event sequence.
|
|
||||||
|
|
||||||
This patch makes the cpuset controller to check the
|
|
||||||
CGRP_ROOT_CPUSET_V2_MODE flag and use the v2 behavior if it is set.
|
|
||||||
|
|
||||||
Signed-off-by: Waiman Long <longman@redhat.com>
|
|
||||||
Signed-off-by: Tejun Heo <tj@kernel.org>
|
|
||||||
(cherry-picked from b8d1b8ee93df8ffbabbeadd65d39853cfad6d698)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
kernel/cgroup/cpuset.c | 33 ++++++++++++++++++++-------------
|
|
||||||
1 file changed, 20 insertions(+), 13 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
|
|
||||||
index e8cb34193433..f76c4bf3d46a 100644
|
|
||||||
--- a/kernel/cgroup/cpuset.c
|
|
||||||
+++ b/kernel/cgroup/cpuset.c
|
|
||||||
@@ -299,6 +299,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
|
|
||||||
|
|
||||||
static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
|
|
||||||
|
|
||||||
+/*
|
|
||||||
+ * Cgroup v2 behavior is used when on default hierarchy or the
|
|
||||||
+ * cgroup_v2_mode flag is set.
|
|
||||||
+ */
|
|
||||||
+static inline bool is_in_v2_mode(void)
|
|
||||||
+{
|
|
||||||
+ return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
|
|
||||||
+ (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* This is ugly, but preserves the userspace API for existing cpuset
|
|
||||||
* users. If someone tries to mount the "cpuset" filesystem, we
|
|
||||||
@@ -489,8 +499,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
|
||||||
|
|
||||||
/* On legacy hiearchy, we must be a subset of our parent cpuset. */
|
|
||||||
ret = -EACCES;
|
|
||||||
- if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
|
||||||
- !is_cpuset_subset(trial, par))
|
|
||||||
+ if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/*
|
|
||||||
@@ -896,8 +905,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
|
|
||||||
* If it becomes empty, inherit the effective mask of the
|
|
||||||
* parent, which is guaranteed to have some CPUs.
|
|
||||||
*/
|
|
||||||
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
|
||||||
- cpumask_empty(new_cpus))
|
|
||||||
+ if (is_in_v2_mode() && cpumask_empty(new_cpus))
|
|
||||||
cpumask_copy(new_cpus, parent->effective_cpus);
|
|
||||||
|
|
||||||
/* Skip the whole subtree if the cpumask remains the same. */
|
|
||||||
@@ -914,7 +922,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
|
|
||||||
cpumask_copy(cp->effective_cpus, new_cpus);
|
|
||||||
spin_unlock_irq(&callback_lock);
|
|
||||||
|
|
||||||
- WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
|
||||||
+ WARN_ON(!is_in_v2_mode() &&
|
|
||||||
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
|
|
||||||
|
|
||||||
update_tasks_cpumask(cp);
|
|
||||||
@@ -1150,8 +1158,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
|
|
||||||
* If it becomes empty, inherit the effective mask of the
|
|
||||||
* parent, which is guaranteed to have some MEMs.
|
|
||||||
*/
|
|
||||||
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
|
||||||
- nodes_empty(*new_mems))
|
|
||||||
+ if (is_in_v2_mode() && nodes_empty(*new_mems))
|
|
||||||
*new_mems = parent->effective_mems;
|
|
||||||
|
|
||||||
/* Skip the whole subtree if the nodemask remains the same. */
|
|
||||||
@@ -1168,7 +1175,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
|
|
||||||
cp->effective_mems = *new_mems;
|
|
||||||
spin_unlock_irq(&callback_lock);
|
|
||||||
|
|
||||||
- WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
|
||||||
+ WARN_ON(!is_in_v2_mode() &&
|
|
||||||
!nodes_equal(cp->mems_allowed, cp->effective_mems));
|
|
||||||
|
|
||||||
update_tasks_nodemask(cp);
|
|
||||||
@@ -1460,7 +1467,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
|
||||||
|
|
||||||
/* allow moving tasks into an empty cpuset if on default hierarchy */
|
|
||||||
ret = -ENOSPC;
|
|
||||||
- if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
|
||||||
+ if (!is_in_v2_mode() &&
|
|
||||||
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
|
|
||||||
goto out_unlock;
|
|
||||||
|
|
||||||
@@ -1979,7 +1986,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
|
|
||||||
cpuset_inc();
|
|
||||||
|
|
||||||
spin_lock_irq(&callback_lock);
|
|
||||||
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
|
|
||||||
+ if (is_in_v2_mode()) {
|
|
||||||
cpumask_copy(cs->effective_cpus, parent->effective_cpus);
|
|
||||||
cs->effective_mems = parent->effective_mems;
|
|
||||||
}
|
|
||||||
@@ -2056,7 +2063,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
|
|
||||||
mutex_lock(&cpuset_mutex);
|
|
||||||
spin_lock_irq(&callback_lock);
|
|
||||||
|
|
||||||
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
|
|
||||||
+ if (is_in_v2_mode()) {
|
|
||||||
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
|
|
||||||
top_cpuset.mems_allowed = node_possible_map;
|
|
||||||
} else {
|
|
||||||
@@ -2250,7 +2257,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs)
|
|
||||||
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
|
|
||||||
mems_updated = !nodes_equal(new_mems, cs->effective_mems);
|
|
||||||
|
|
||||||
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
|
|
||||||
+ if (is_in_v2_mode())
|
|
||||||
hotplug_update_tasks(cs, &new_cpus, &new_mems,
|
|
||||||
cpus_updated, mems_updated);
|
|
||||||
else
|
|
||||||
@@ -2288,7 +2295,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
|
|
||||||
static cpumask_t new_cpus;
|
|
||||||
static nodemask_t new_mems;
|
|
||||||
bool cpus_updated, mems_updated;
|
|
||||||
- bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
|
|
||||||
+ bool on_dfl = is_in_v2_mode();
|
|
||||||
|
|
||||||
mutex_lock(&cpuset_mutex);
|
|
||||||
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -27,7 +27,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
|
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
|
||||||
index a4b6ffb61495..c5a5ad8ac00f 100644
|
index 5d4c15bf66d2..e35d4f73d2df 100644
|
||||||
--- a/net/sctp/ipv6.c
|
--- a/net/sctp/ipv6.c
|
||||||
+++ b/net/sctp/ipv6.c
|
+++ b/net/sctp/ipv6.c
|
||||||
@@ -326,8 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
|
@@ -326,8 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
|
@ -1,90 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
|
|
||||||
Date: Thu, 30 Nov 2017 19:05:45 +0100
|
|
||||||
Subject: [PATCH] KVM: x86: fix APIC page invalidation
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Implementation of the unpinned APIC page didn't update the VMCS address
|
|
||||||
cache when invalidation was done through range mmu notifiers.
|
|
||||||
This became a problem when the page notifier was removed.
|
|
||||||
|
|
||||||
Re-introduce the arch-specific helper and call it from ...range_start.
|
|
||||||
|
|
||||||
Fixes: 38b9917350cb ("kvm: vmx: Implement set_apic_access_page_addr")
|
|
||||||
Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2")
|
|
||||||
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
arch/x86/include/asm/kvm_host.h | 3 +++
|
|
||||||
arch/x86/kvm/x86.c | 14 ++++++++++++++
|
|
||||||
virt/kvm/kvm_main.c | 8 ++++++++
|
|
||||||
3 files changed, 25 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
|
|
||||||
index 78ec3cda9429..1953c0a5b972 100644
|
|
||||||
--- a/arch/x86/include/asm/kvm_host.h
|
|
||||||
+++ b/arch/x86/include/asm/kvm_host.h
|
|
||||||
@@ -1439,4 +1439,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
|
||||||
+ unsigned long start, unsigned long end);
|
|
||||||
+
|
|
||||||
#endif /* _ASM_X86_KVM_HOST_H */
|
|
||||||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
|
|
||||||
index f896c441fc2c..eae4aecf3cfe 100644
|
|
||||||
--- a/arch/x86/kvm/x86.c
|
|
||||||
+++ b/arch/x86/kvm/x86.c
|
|
||||||
@@ -6711,6 +6711,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
|
|
||||||
kvm_x86_ops->tlb_flush(vcpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
|
||||||
+ unsigned long start, unsigned long end)
|
|
||||||
+{
|
|
||||||
+ unsigned long apic_address;
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * The physical address of apic access page is stored in the VMCS.
|
|
||||||
+ * Update it when it becomes invalid.
|
|
||||||
+ */
|
|
||||||
+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
|
|
||||||
+ if (start <= apic_address && apic_address < end)
|
|
||||||
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
struct page *page = NULL;
|
|
||||||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
|
|
||||||
index bfa9c4d34102..d0085c9d6297 100644
|
|
||||||
--- a/virt/kvm/kvm_main.c
|
|
||||||
+++ b/virt/kvm/kvm_main.c
|
|
||||||
@@ -136,6 +136,11 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
|
|
||||||
static unsigned long long kvm_createvm_count;
|
|
||||||
static unsigned long long kvm_active_vms;
|
|
||||||
|
|
||||||
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
|
||||||
+ unsigned long start, unsigned long end)
|
|
||||||
+{
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
|
|
||||||
{
|
|
||||||
if (pfn_valid(pfn))
|
|
||||||
@@ -361,6 +366,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
|
||||||
kvm_flush_remote_tlbs(kvm);
|
|
||||||
|
|
||||||
spin_unlock(&kvm->mmu_lock);
|
|
||||||
+
|
|
||||||
+ kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
|
|
||||||
+
|
|
||||||
srcu_read_unlock(&kvm->srcu, idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -28,10 +28,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
|
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
|
||||||
index 88a31e9340a0..77ec9b495027 100644
|
index d1516327b787..256986aca8df 100644
|
||||||
--- a/fs/ocfs2/aops.c
|
--- a/fs/ocfs2/aops.c
|
||||||
+++ b/fs/ocfs2/aops.c
|
+++ b/fs/ocfs2/aops.c
|
||||||
@@ -784,6 +784,7 @@ struct ocfs2_write_ctxt {
|
@@ -797,6 +797,7 @@ struct ocfs2_write_ctxt {
|
||||||
struct ocfs2_cached_dealloc_ctxt w_dealloc;
|
struct ocfs2_cached_dealloc_ctxt w_dealloc;
|
||||||
|
|
||||||
struct list_head w_unwritten_list;
|
struct list_head w_unwritten_list;
|
||||||
@ -39,7 +39,7 @@ index 88a31e9340a0..77ec9b495027 100644
|
|||||||
};
|
};
|
||||||
|
|
||||||
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
|
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
|
||||||
@@ -1373,6 +1374,7 @@ static int ocfs2_unwritten_check(struct inode *inode,
|
@@ -1386,6 +1387,7 @@ static int ocfs2_unwritten_check(struct inode *inode,
|
||||||
desc->c_clear_unwritten = 0;
|
desc->c_clear_unwritten = 0;
|
||||||
list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list);
|
list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list);
|
||||||
list_add_tail(&new->ue_node, &wc->w_unwritten_list);
|
list_add_tail(&new->ue_node, &wc->w_unwritten_list);
|
||||||
@ -47,7 +47,7 @@ index 88a31e9340a0..77ec9b495027 100644
|
|||||||
new = NULL;
|
new = NULL;
|
||||||
unlock:
|
unlock:
|
||||||
spin_unlock(&oi->ip_lock);
|
spin_unlock(&oi->ip_lock);
|
||||||
@@ -2246,7 +2248,7 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
|
@@ -2256,7 +2258,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
|
||||||
ue->ue_phys = desc->c_phys;
|
ue->ue_phys = desc->c_phys;
|
||||||
|
|
||||||
list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list);
|
list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list);
|
@ -72,7 +72,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|||||||
3 files changed, 203 insertions(+), 10 deletions(-)
|
3 files changed, 203 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
|
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
|
||||||
index 4a5152ec88a3..571692171dd1 100644
|
index 27b75cf32cfa..250bcacdf9e9 100644
|
||||||
--- a/fs/ocfs2/alloc.h
|
--- a/fs/ocfs2/alloc.h
|
||||||
+++ b/fs/ocfs2/alloc.h
|
+++ b/fs/ocfs2/alloc.h
|
||||||
@@ -61,6 +61,7 @@ struct ocfs2_extent_tree {
|
@@ -61,6 +61,7 @@ struct ocfs2_extent_tree {
|
||||||
@ -84,7 +84,7 @@ index 4a5152ec88a3..571692171dd1 100644
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
|
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
|
||||||
index 386aecce881d..9b5e7d8ba710 100644
|
index ab5105f9767e..2f2c76193f54 100644
|
||||||
--- a/fs/ocfs2/alloc.c
|
--- a/fs/ocfs2/alloc.c
|
||||||
+++ b/fs/ocfs2/alloc.c
|
+++ b/fs/ocfs2/alloc.c
|
||||||
@@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
|
@@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
|
||||||
@ -109,7 +109,7 @@ index 386aecce881d..9b5e7d8ba710 100644
|
|||||||
|
|
||||||
et->et_ops->eo_fill_root_el(et);
|
et->et_ops->eo_fill_root_el(et);
|
||||||
if (!et->et_ops->eo_fill_max_leaf_clusters)
|
if (!et->et_ops->eo_fill_max_leaf_clusters)
|
||||||
@@ -1159,7 +1167,7 @@ static int ocfs2_add_branch(handle_t *handle,
|
@@ -1158,7 +1166,7 @@ static int ocfs2_add_branch(handle_t *handle,
|
||||||
struct buffer_head **last_eb_bh,
|
struct buffer_head **last_eb_bh,
|
||||||
struct ocfs2_alloc_context *meta_ac)
|
struct ocfs2_alloc_context *meta_ac)
|
||||||
{
|
{
|
||||||
@ -118,7 +118,7 @@ index 386aecce881d..9b5e7d8ba710 100644
|
|||||||
u64 next_blkno, new_last_eb_blk;
|
u64 next_blkno, new_last_eb_blk;
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
struct buffer_head **new_eb_bhs = NULL;
|
struct buffer_head **new_eb_bhs = NULL;
|
||||||
@@ -1214,11 +1222,31 @@ static int ocfs2_add_branch(handle_t *handle,
|
@@ -1213,11 +1221,31 @@ static int ocfs2_add_branch(handle_t *handle,
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,7 +155,7 @@ index 386aecce881d..9b5e7d8ba710 100644
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
|
/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
|
||||||
@@ -1341,15 +1369,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
|
@@ -1340,15 +1368,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
|
||||||
struct ocfs2_alloc_context *meta_ac,
|
struct ocfs2_alloc_context *meta_ac,
|
||||||
struct buffer_head **ret_new_eb_bh)
|
struct buffer_head **ret_new_eb_bh)
|
||||||
{
|
{
|
||||||
@ -184,7 +184,7 @@ index 386aecce881d..9b5e7d8ba710 100644
|
|||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
goto bail;
|
goto bail;
|
||||||
@@ -1512,7 +1550,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
|
@@ -1511,7 +1549,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
|
||||||
int depth = le16_to_cpu(el->l_tree_depth);
|
int depth = le16_to_cpu(el->l_tree_depth);
|
||||||
struct buffer_head *bh = NULL;
|
struct buffer_head *bh = NULL;
|
||||||
|
|
||||||
@ -193,7 +193,7 @@ index 386aecce881d..9b5e7d8ba710 100644
|
|||||||
|
|
||||||
shift = ocfs2_find_branch_target(et, &bh);
|
shift = ocfs2_find_branch_target(et, &bh);
|
||||||
if (shift < 0) {
|
if (shift < 0) {
|
||||||
@@ -6593,6 +6631,154 @@ ocfs2_find_per_slot_free_list(int type,
|
@@ -6585,6 +6623,154 @@ ocfs2_find_per_slot_free_list(int type,
|
||||||
return fl;
|
return fl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -349,10 +349,10 @@ index 386aecce881d..9b5e7d8ba710 100644
|
|||||||
int type, int slot, u64 suballoc,
|
int type, int slot, u64 suballoc,
|
||||||
u64 blkno, unsigned int bit)
|
u64 blkno, unsigned int bit)
|
||||||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
|
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
|
||||||
index 77ec9b495027..2ff02dda97d8 100644
|
index 256986aca8df..e8e205bf2e41 100644
|
||||||
--- a/fs/ocfs2/aops.c
|
--- a/fs/ocfs2/aops.c
|
||||||
+++ b/fs/ocfs2/aops.c
|
+++ b/fs/ocfs2/aops.c
|
||||||
@@ -2322,6 +2322,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
|
@@ -2332,6 +2332,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
|
||||||
|
|
||||||
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
|
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
|
||||||
|
|
@ -1,72 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Wei Xu <wexu@redhat.com>
|
|
||||||
Date: Fri, 1 Dec 2017 05:10:36 -0500
|
|
||||||
Subject: [PATCH] vhost: fix skb leak in handle_rx()
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Matthew found a roughly 40% tcp throughput regression with commit
|
|
||||||
c67df11f(vhost_net: try batch dequing from skb array) as discussed
|
|
||||||
in the following thread:
|
|
||||||
https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html
|
|
||||||
|
|
||||||
Eventually we figured out that it was a skb leak in handle_rx()
|
|
||||||
when sending packets to the VM. This usually happens when a guest
|
|
||||||
can not drain out vq as fast as vhost fills in, afterwards it sets
|
|
||||||
off the traffic jam and leaks skb(s) which occurs as no headcount
|
|
||||||
to send on the vq from vhost side.
|
|
||||||
|
|
||||||
This can be avoided by making sure we have got enough headcount
|
|
||||||
before actually consuming a skb from the batched rx array while
|
|
||||||
transmitting, which is simply done by moving checking the zero
|
|
||||||
headcount a bit ahead.
|
|
||||||
|
|
||||||
Signed-off-by: Wei Xu <wexu@redhat.com>
|
|
||||||
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/vhost/net.c | 20 ++++++++++----------
|
|
||||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
|
|
||||||
index 1c75572f5a3f..010253847022 100644
|
|
||||||
--- a/drivers/vhost/net.c
|
|
||||||
+++ b/drivers/vhost/net.c
|
|
||||||
@@ -781,16 +781,6 @@ static void handle_rx(struct vhost_net *net)
|
|
||||||
/* On error, stop handling until the next kick. */
|
|
||||||
if (unlikely(headcount < 0))
|
|
||||||
goto out;
|
|
||||||
- if (nvq->rx_array)
|
|
||||||
- msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
|
|
||||||
- /* On overrun, truncate and discard */
|
|
||||||
- if (unlikely(headcount > UIO_MAXIOV)) {
|
|
||||||
- iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
|
|
||||||
- err = sock->ops->recvmsg(sock, &msg,
|
|
||||||
- 1, MSG_DONTWAIT | MSG_TRUNC);
|
|
||||||
- pr_debug("Discarded rx packet: len %zd\n", sock_len);
|
|
||||||
- continue;
|
|
||||||
- }
|
|
||||||
/* OK, now we need to know about added descriptors. */
|
|
||||||
if (!headcount) {
|
|
||||||
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
|
|
||||||
@@ -803,6 +793,16 @@ static void handle_rx(struct vhost_net *net)
|
|
||||||
* they refilled. */
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
+ if (nvq->rx_array)
|
|
||||||
+ msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
|
|
||||||
+ /* On overrun, truncate and discard */
|
|
||||||
+ if (unlikely(headcount > UIO_MAXIOV)) {
|
|
||||||
+ iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
|
|
||||||
+ err = sock->ops->recvmsg(sock, &msg,
|
|
||||||
+ 1, MSG_DONTWAIT | MSG_TRUNC);
|
|
||||||
+ pr_debug("Discarded rx packet: len %zd\n", sock_len);
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
/* We don't need to be notified again. */
|
|
||||||
iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
|
|
||||||
fixup = msg.msg_iter;
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,86 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Wei Xu <wexu@redhat.com>
|
|
||||||
Date: Fri, 1 Dec 2017 05:10:37 -0500
|
|
||||||
Subject: [PATCH] tun: free skb in early errors
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
tun_recvmsg() supports accepting skb by msg_control after
|
|
||||||
commit ac77cfd4258f ("tun: support receiving skb through msg_control"),
|
|
||||||
the skb if presented should be freed no matter how far it can go
|
|
||||||
along, otherwise it would be leaked.
|
|
||||||
|
|
||||||
This patch fixes several missed cases.
|
|
||||||
|
|
||||||
Signed-off-by: Wei Xu <wexu@redhat.com>
|
|
||||||
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/net/tun.c | 24 ++++++++++++++++++------
|
|
||||||
1 file changed, 18 insertions(+), 6 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
|
|
||||||
index d1cb1ff83251..d58ae8ad0a4e 100644
|
|
||||||
--- a/drivers/net/tun.c
|
|
||||||
+++ b/drivers/net/tun.c
|
|
||||||
@@ -1519,8 +1519,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
|
|
||||||
|
|
||||||
tun_debug(KERN_INFO, tun, "tun_do_read\n");
|
|
||||||
|
|
||||||
- if (!iov_iter_count(to))
|
|
||||||
+ if (!iov_iter_count(to)) {
|
|
||||||
+ if (skb)
|
|
||||||
+ kfree_skb(skb);
|
|
||||||
return 0;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
if (!skb) {
|
|
||||||
/* Read frames from ring */
|
|
||||||
@@ -1636,22 +1639,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
|
|
||||||
{
|
|
||||||
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
|
|
||||||
struct tun_struct *tun = __tun_get(tfile);
|
|
||||||
+ struct sk_buff *skb = m->msg_control;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
- if (!tun)
|
|
||||||
- return -EBADFD;
|
|
||||||
+ if (!tun) {
|
|
||||||
+ ret = -EBADFD;
|
|
||||||
+ goto out_free_skb;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
- goto out;
|
|
||||||
+ goto out_put_tun;
|
|
||||||
}
|
|
||||||
if (flags & MSG_ERRQUEUE) {
|
|
||||||
ret = sock_recv_errqueue(sock->sk, m, total_len,
|
|
||||||
SOL_PACKET, TUN_TX_TIMESTAMP);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
- ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
|
|
||||||
- m->msg_control);
|
|
||||||
+ ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
|
|
||||||
if (ret > (ssize_t)total_len) {
|
|
||||||
m->msg_flags |= MSG_TRUNC;
|
|
||||||
ret = flags & MSG_TRUNC ? ret : total_len;
|
|
||||||
@@ -1659,6 +1664,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
|
|
||||||
out:
|
|
||||||
tun_put(tun);
|
|
||||||
return ret;
|
|
||||||
+
|
|
||||||
+out_put_tun:
|
|
||||||
+ tun_put(tun);
|
|
||||||
+out_free_skb:
|
|
||||||
+ if (skb)
|
|
||||||
+ kfree_skb(skb);
|
|
||||||
+ return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int tun_peek_len(struct socket *sock)
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,58 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Wei Xu <wexu@redhat.com>
|
|
||||||
Date: Fri, 1 Dec 2017 05:10:38 -0500
|
|
||||||
Subject: [PATCH] tap: free skb if flags error
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
tap_recvmsg() supports accepting skb by msg_control after
|
|
||||||
commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"),
|
|
||||||
the skb if presented should be freed within the function, otherwise
|
|
||||||
it would be leaked.
|
|
||||||
|
|
||||||
Signed-off-by: Wei Xu <wexu@redhat.com>
|
|
||||||
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/net/tap.c | 14 ++++++++++----
|
|
||||||
1 file changed, 10 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
|
|
||||||
index 7a2f6bebfd15..96e5e5b2ae39 100644
|
|
||||||
--- a/drivers/net/tap.c
|
|
||||||
+++ b/drivers/net/tap.c
|
|
||||||
@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q,
|
|
||||||
DEFINE_WAIT(wait);
|
|
||||||
ssize_t ret = 0;
|
|
||||||
|
|
||||||
- if (!iov_iter_count(to))
|
|
||||||
+ if (!iov_iter_count(to)) {
|
|
||||||
+ if (skb)
|
|
||||||
+ kfree_skb(skb);
|
|
||||||
return 0;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
if (skb)
|
|
||||||
goto put;
|
|
||||||
@@ -1157,11 +1160,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m,
|
|
||||||
size_t total_len, int flags)
|
|
||||||
{
|
|
||||||
struct tap_queue *q = container_of(sock, struct tap_queue, sock);
|
|
||||||
+ struct sk_buff *skb = m->msg_control;
|
|
||||||
int ret;
|
|
||||||
- if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
|
|
||||||
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
|
|
||||||
+ if (skb)
|
|
||||||
+ kfree_skb(skb);
|
|
||||||
return -EINVAL;
|
|
||||||
- ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
|
|
||||||
- m->msg_control);
|
|
||||||
+ }
|
|
||||||
+ ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
|
|
||||||
if (ret > total_len) {
|
|
||||||
m->msg_flags |= MSG_TRUNC;
|
|
||||||
ret = flags & MSG_TRUNC ? ret : total_len;
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,93 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Parav Pandit <parav@mellanox.com>
|
|
||||||
Date: Fri, 5 Jan 2018 23:51:12 +0100
|
|
||||||
Subject: [PATCH] IB/core: Avoid crash on pkey enforcement failed in received
|
|
||||||
MADs
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
commit 89548bcafec7ecfeea58c553f0834b5d575a66eb upstream.
|
|
||||||
|
|
||||||
Below kernel crash is observed when Pkey security enforcement fails on
|
|
||||||
received MADs. This issue is reported in [1].
|
|
||||||
|
|
||||||
ib_free_recv_mad() accesses the rmpp_list, whose initialization is
|
|
||||||
needed before accessing it.
|
|
||||||
When security enformcent fails on received MADs, MAD processing avoided
|
|
||||||
due to security checks failed.
|
|
||||||
|
|
||||||
OpenSM[3770]: SM port is down
|
|
||||||
kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
|
|
||||||
kernel: IP: ib_free_recv_mad+0x44/0xa0 [ib_core]
|
|
||||||
kernel: PGD 0
|
|
||||||
kernel: P4D 0
|
|
||||||
kernel:
|
|
||||||
kernel: Oops: 0002 [#1] SMP
|
|
||||||
kernel: CPU: 0 PID: 2833 Comm: kworker/0:1H Tainted: P IO 4.13.4-1-pve #1
|
|
||||||
kernel: Hardware name: Dell XS23-TY3 /9CMP63, BIOS 1.71 09/17/2013
|
|
||||||
kernel: Workqueue: ib-comp-wq ib_cq_poll_work [ib_core]
|
|
||||||
kernel: task: ffffa069c6541600 task.stack: ffffb9a729054000
|
|
||||||
kernel: RIP: 0010:ib_free_recv_mad+0x44/0xa0 [ib_core]
|
|
||||||
kernel: RSP: 0018:ffffb9a729057d38 EFLAGS: 00010286
|
|
||||||
kernel: RAX: ffffa069cb138a48 RBX: ffffa069cb138a10 RCX: 0000000000000000
|
|
||||||
kernel: RDX: ffffb9a729057d38 RSI: 0000000000000000 RDI: ffffa069cb138a20
|
|
||||||
kernel: RBP: ffffb9a729057d60 R08: ffffa072d2d49800 R09: ffffa069cb138ae0
|
|
||||||
kernel: R10: ffffa069cb138ae0 R11: ffffa072b3994e00 R12: ffffb9a729057d38
|
|
||||||
kernel: R13: ffffa069d1c90000 R14: 0000000000000000 R15: ffffa069d1c90880
|
|
||||||
kernel: FS: 0000000000000000(0000) GS:ffffa069dba00000(0000) knlGS:0000000000000000
|
|
||||||
kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
|
|
||||||
kernel: CR2: 0000000000000008 CR3: 00000011f51f2000 CR4: 00000000000006f0
|
|
||||||
kernel: Call Trace:
|
|
||||||
kernel: ib_mad_recv_done+0x5cc/0xb50 [ib_core]
|
|
||||||
kernel: __ib_process_cq+0x5c/0xb0 [ib_core]
|
|
||||||
kernel: ib_cq_poll_work+0x20/0x60 [ib_core]
|
|
||||||
kernel: process_one_work+0x1e9/0x410
|
|
||||||
kernel: worker_thread+0x4b/0x410
|
|
||||||
kernel: kthread+0x109/0x140
|
|
||||||
kernel: ? process_one_work+0x410/0x410
|
|
||||||
kernel: ? kthread_create_on_node+0x70/0x70
|
|
||||||
kernel: ? SyS_exit_group+0x14/0x20
|
|
||||||
kernel: ret_from_fork+0x25/0x30
|
|
||||||
kernel: RIP: ib_free_recv_mad+0x44/0xa0 [ib_core] RSP: ffffb9a729057d38
|
|
||||||
kernel: CR2: 0000000000000008
|
|
||||||
|
|
||||||
[1] : https://www.spinics.net/lists/linux-rdma/msg56190.html
|
|
||||||
|
|
||||||
Fixes: 47a2b338fe63 ("IB/core: Enforce security on management datagrams")
|
|
||||||
Signed-off-by: Parav Pandit <parav@mellanox.com>
|
|
||||||
Reported-by: Chris Blake <chrisrblake93@gmail.com>
|
|
||||||
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
|
|
||||||
Reviewed-by: Hal Rosenstock <hal@mellanox.com>
|
|
||||||
Signed-off-by: Doug Ledford <dledford@redhat.com>
|
|
||||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
|
||||||
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/infiniband/core/mad.c | 3 ++-
|
|
||||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
|
|
||||||
index f8f53bb90837..cb91245e9163 100644
|
|
||||||
--- a/drivers/infiniband/core/mad.c
|
|
||||||
+++ b/drivers/infiniband/core/mad.c
|
|
||||||
@@ -1974,14 +1974,15 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
|
|
||||||
unsigned long flags;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
+ INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
|
|
||||||
ret = ib_mad_enforce_security(mad_agent_priv,
|
|
||||||
mad_recv_wc->wc->pkey_index);
|
|
||||||
if (ret) {
|
|
||||||
ib_free_recv_mad(mad_recv_wc);
|
|
||||||
deref_mad_agent(mad_agent_priv);
|
|
||||||
+ return;
|
|
||||||
}
|
|
||||||
|
|
||||||
- INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
|
|
||||||
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
|
|
||||||
if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
|
|
||||||
mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Daniel Jurgens <danielj@mellanox.com>
|
|
||||||
Date: Mon, 20 Nov 2017 16:47:45 -0600
|
|
||||||
Subject: [PATCH] IB/core: Don't enforce PKey security on SMI MADs
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Per the infiniband spec an SMI MAD can have any PKey. Checking the pkey
|
|
||||||
on SMI MADs is not necessary, and it seems that some older adapters
|
|
||||||
using the mthca driver don't follow the convention of using the default
|
|
||||||
PKey, resulting in false denials, or errors querying the PKey cache.
|
|
||||||
|
|
||||||
SMI MAD security is still enforced, only agents allowed to manage the
|
|
||||||
subnet are able to receive or send SMI MADs.
|
|
||||||
|
|
||||||
Reported-by: Chris Blake <chrisrblake93@gmail.com>
|
|
||||||
Fixes: 47a2b338fe63("IB/core: Enforce security on management datagrams")
|
|
||||||
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
|
|
||||||
Reviewed-by: Parav Pandit <parav@mellanox.com>
|
|
||||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/infiniband/core/security.c | 7 +++++--
|
|
||||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
|
|
||||||
index 70ad19c4c73e..8f9fd3b757db 100644
|
|
||||||
--- a/drivers/infiniband/core/security.c
|
|
||||||
+++ b/drivers/infiniband/core/security.c
|
|
||||||
@@ -692,8 +692,11 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
- if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed)
|
|
||||||
- return -EACCES;
|
|
||||||
+ if (map->agent.qp->qp_type == IB_QPT_SMI) {
|
|
||||||
+ if (!map->agent.smp_allowed)
|
|
||||||
+ return -EACCES;
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
ret = ib_security_pkey_access(map->agent.device,
|
|
||||||
map->agent.port_num,
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,299 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Date: Mon, 6 Nov 2017 13:31:12 +0100
|
|
||||||
Subject: [PATCH] kvm: vmx: Reinstate support for CPUs without virtual NMI
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
commit 8a1b43922d0d1279e7936ba85c4c2a870403c95f upstream.
|
|
||||||
|
|
||||||
This is more or less a revert of commit 2c82878b0cb3 ("KVM: VMX: require
|
|
||||||
virtual NMI support", 2017-03-27); it turns out that Core 2 Duo machines
|
|
||||||
only had virtual NMIs in some SKUs.
|
|
||||||
|
|
||||||
The revert is not trivial because in the meanwhile there have been several
|
|
||||||
fixes to nested NMI injection. Therefore, the entire vNMI state is moved
|
|
||||||
to struct loaded_vmcs.
|
|
||||||
|
|
||||||
Another change compared to before the patch is a simplification here:
|
|
||||||
|
|
||||||
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
|
|
||||||
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
|
|
||||||
get_vmcs12(vcpu))))) {
|
|
||||||
|
|
||||||
The final condition here is always true (because nested_cpu_has_virtual_nmis
|
|
||||||
is always false) and is removed.
|
|
||||||
|
|
||||||
Fixes: 2c82878b0cb38fd516fd612c67852a6bbf282003
|
|
||||||
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1490803
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
|
|
||||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
arch/x86/kvm/vmx.c | 150 +++++++++++++++++++++++++++++++++++++----------------
|
|
||||||
1 file changed, 106 insertions(+), 44 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
|
||||||
index 5edf05ce45de..146caacd8fdd 100644
|
|
||||||
--- a/arch/x86/kvm/vmx.c
|
|
||||||
+++ b/arch/x86/kvm/vmx.c
|
|
||||||
@@ -204,6 +204,10 @@ struct loaded_vmcs {
|
|
||||||
bool nmi_known_unmasked;
|
|
||||||
unsigned long vmcs_host_cr3; /* May not match real cr3 */
|
|
||||||
unsigned long vmcs_host_cr4; /* May not match real cr4 */
|
|
||||||
+ /* Support for vnmi-less CPUs */
|
|
||||||
+ int soft_vnmi_blocked;
|
|
||||||
+ ktime_t entry_time;
|
|
||||||
+ s64 vnmi_blocked_time;
|
|
||||||
struct list_head loaded_vmcss_on_cpu_link;
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -1290,6 +1294,11 @@ static inline bool cpu_has_vmx_invpcid(void)
|
|
||||||
SECONDARY_EXEC_ENABLE_INVPCID;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static inline bool cpu_has_virtual_nmis(void)
|
|
||||||
+{
|
|
||||||
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static inline bool cpu_has_vmx_wbinvd_exit(void)
|
|
||||||
{
|
|
||||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
|
||||||
@@ -1341,11 +1350,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
|
|
||||||
(vmcs12->secondary_vm_exec_control & bit);
|
|
||||||
}
|
|
||||||
|
|
||||||
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
|
|
||||||
-{
|
|
||||||
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
|
|
||||||
{
|
|
||||||
return vmcs12->pin_based_vm_exec_control &
|
|
||||||
@@ -3687,9 +3691,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|
||||||
&_vmexit_control) < 0)
|
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
|
|
||||||
- PIN_BASED_VIRTUAL_NMIS;
|
|
||||||
- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
|
|
||||||
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
|
|
||||||
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
|
|
||||||
+ PIN_BASED_VMX_PREEMPTION_TIMER;
|
|
||||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
|
|
||||||
&_pin_based_exec_control) < 0)
|
|
||||||
return -EIO;
|
|
||||||
@@ -5549,7 +5553,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
|
||||||
|
|
||||||
static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
|
|
||||||
+ if (!cpu_has_virtual_nmis() ||
|
|
||||||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
|
|
||||||
enable_irq_window(vcpu);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
@@ -5589,6 +5594,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
||||||
|
|
||||||
+ if (!cpu_has_virtual_nmis()) {
|
|
||||||
+ /*
|
|
||||||
+ * Tracking the NMI-blocked state in software is built upon
|
|
||||||
+ * finding the next open IRQ window. This, in turn, depends on
|
|
||||||
+ * well-behaving guests: They have to keep IRQs disabled at
|
|
||||||
+ * least as long as the NMI handler runs. Otherwise we may
|
|
||||||
+ * cause NMI nesting, maybe breaking the guest. But as this is
|
|
||||||
+ * highly unlikely, we can live with the residual risk.
|
|
||||||
+ */
|
|
||||||
+ vmx->loaded_vmcs->soft_vnmi_blocked = 1;
|
|
||||||
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
++vcpu->stat.nmi_injections;
|
|
||||||
vmx->loaded_vmcs->nmi_known_unmasked = false;
|
|
||||||
|
|
||||||
@@ -5607,6 +5625,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
|
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
||||||
bool masked;
|
|
||||||
|
|
||||||
+ if (!cpu_has_virtual_nmis())
|
|
||||||
+ return vmx->loaded_vmcs->soft_vnmi_blocked;
|
|
||||||
if (vmx->loaded_vmcs->nmi_known_unmasked)
|
|
||||||
return false;
|
|
||||||
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
|
|
||||||
@@ -5618,13 +5638,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
|
|
||||||
{
|
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
||||||
|
|
||||||
- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
|
|
||||||
- if (masked)
|
|
||||||
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
||||||
- GUEST_INTR_STATE_NMI);
|
|
||||||
- else
|
|
||||||
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
||||||
- GUEST_INTR_STATE_NMI);
|
|
||||||
+ if (!cpu_has_virtual_nmis()) {
|
|
||||||
+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
|
|
||||||
+ vmx->loaded_vmcs->soft_vnmi_blocked = masked;
|
|
||||||
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
|
|
||||||
+ }
|
|
||||||
+ } else {
|
|
||||||
+ vmx->loaded_vmcs->nmi_known_unmasked = !masked;
|
|
||||||
+ if (masked)
|
|
||||||
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
||||||
+ GUEST_INTR_STATE_NMI);
|
|
||||||
+ else
|
|
||||||
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
||||||
+ GUEST_INTR_STATE_NMI);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
|
||||||
@@ -5632,6 +5659,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
|
||||||
if (to_vmx(vcpu)->nested.nested_run_pending)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
+ if (!cpu_has_virtual_nmis() &&
|
|
||||||
+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
|
|
||||||
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
|
|
||||||
| GUEST_INTR_STATE_NMI));
|
|
||||||
@@ -6360,6 +6391,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
|
||||||
* AAK134, BY25.
|
|
||||||
*/
|
|
||||||
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
|
||||||
+ cpu_has_virtual_nmis() &&
|
|
||||||
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
|
||||||
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
|
|
||||||
|
|
||||||
@@ -6834,7 +6866,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Create a new VMCS */
|
|
||||||
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
|
|
||||||
+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
|
|
||||||
if (!item)
|
|
||||||
return NULL;
|
|
||||||
item->vmcs02.vmcs = alloc_vmcs();
|
|
||||||
@@ -7851,6 +7883,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
|
|
||||||
* "blocked by NMI" bit has to be set before next VM entry.
|
|
||||||
*/
|
|
||||||
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
|
||||||
+ cpu_has_virtual_nmis() &&
|
|
||||||
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
|
||||||
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
||||||
GUEST_INTR_STATE_NMI);
|
|
||||||
@@ -8568,6 +8601,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (unlikely(!cpu_has_virtual_nmis() &&
|
|
||||||
+ vmx->loaded_vmcs->soft_vnmi_blocked)) {
|
|
||||||
+ if (vmx_interrupt_allowed(vcpu)) {
|
|
||||||
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
|
|
||||||
+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
|
|
||||||
+ vcpu->arch.nmi_pending) {
|
|
||||||
+ /*
|
|
||||||
+ * This CPU don't support us in finding the end of an
|
|
||||||
+ * NMI-blocked window if the guest runs with IRQs
|
|
||||||
+ * disabled. So we pull the trigger after 1 s of
|
|
||||||
+ * futile waiting, but inform the user about this.
|
|
||||||
+ */
|
|
||||||
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
|
|
||||||
+ "state on VCPU %d after 1 s timeout\n",
|
|
||||||
+ __func__, vcpu->vcpu_id);
|
|
||||||
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (exit_reason < kvm_vmx_max_exit_handlers
|
|
||||||
&& kvm_vmx_exit_handlers[exit_reason])
|
|
||||||
return kvm_vmx_exit_handlers[exit_reason](vcpu);
|
|
||||||
@@ -8850,33 +8902,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
|
|
||||||
|
|
||||||
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
|
|
||||||
|
|
||||||
- if (vmx->loaded_vmcs->nmi_known_unmasked)
|
|
||||||
- return;
|
|
||||||
- /*
|
|
||||||
- * Can't use vmx->exit_intr_info since we're not sure what
|
|
||||||
- * the exit reason is.
|
|
||||||
- */
|
|
||||||
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
|
||||||
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
|
|
||||||
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
|
||||||
- /*
|
|
||||||
- * SDM 3: 27.7.1.2 (September 2008)
|
|
||||||
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
|
|
||||||
- * a guest IRET fault.
|
|
||||||
- * SDM 3: 23.2.2 (September 2008)
|
|
||||||
- * Bit 12 is undefined in any of the following cases:
|
|
||||||
- * If the VM exit sets the valid bit in the IDT-vectoring
|
|
||||||
- * information field.
|
|
||||||
- * If the VM exit is due to a double fault.
|
|
||||||
- */
|
|
||||||
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
|
|
||||||
- vector != DF_VECTOR && !idtv_info_valid)
|
|
||||||
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
||||||
- GUEST_INTR_STATE_NMI);
|
|
||||||
- else
|
|
||||||
- vmx->loaded_vmcs->nmi_known_unmasked =
|
|
||||||
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
|
|
||||||
- & GUEST_INTR_STATE_NMI);
|
|
||||||
+ if (cpu_has_virtual_nmis()) {
|
|
||||||
+ if (vmx->loaded_vmcs->nmi_known_unmasked)
|
|
||||||
+ return;
|
|
||||||
+ /*
|
|
||||||
+ * Can't use vmx->exit_intr_info since we're not sure what
|
|
||||||
+ * the exit reason is.
|
|
||||||
+ */
|
|
||||||
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
|
||||||
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
|
|
||||||
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
|
||||||
+ /*
|
|
||||||
+ * SDM 3: 27.7.1.2 (September 2008)
|
|
||||||
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
|
|
||||||
+ * a guest IRET fault.
|
|
||||||
+ * SDM 3: 23.2.2 (September 2008)
|
|
||||||
+ * Bit 12 is undefined in any of the following cases:
|
|
||||||
+ * If the VM exit sets the valid bit in the IDT-vectoring
|
|
||||||
+ * information field.
|
|
||||||
+ * If the VM exit is due to a double fault.
|
|
||||||
+ */
|
|
||||||
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
|
|
||||||
+ vector != DF_VECTOR && !idtv_info_valid)
|
|
||||||
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
||||||
+ GUEST_INTR_STATE_NMI);
|
|
||||||
+ else
|
|
||||||
+ vmx->loaded_vmcs->nmi_known_unmasked =
|
|
||||||
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
|
|
||||||
+ & GUEST_INTR_STATE_NMI);
|
|
||||||
+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
|
|
||||||
+ vmx->loaded_vmcs->vnmi_blocked_time +=
|
|
||||||
+ ktime_to_ns(ktime_sub(ktime_get(),
|
|
||||||
+ vmx->loaded_vmcs->entry_time));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
|
|
||||||
@@ -8993,6 +9050,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
||||||
unsigned long debugctlmsr, cr3, cr4;
|
|
||||||
|
|
||||||
+ /* Record the guest's net vcpu time for enforced NMI injections. */
|
|
||||||
+ if (unlikely(!cpu_has_virtual_nmis() &&
|
|
||||||
+ vmx->loaded_vmcs->soft_vnmi_blocked))
|
|
||||||
+ vmx->loaded_vmcs->entry_time = ktime_get();
|
|
||||||
+
|
|
||||||
/* Don't enter VMX if guest state is invalid, let the exit handler
|
|
||||||
start emulation until we arrive back to a valid state */
|
|
||||||
if (vmx->emulation_required)
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,56 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Date: Thu, 26 Oct 2017 09:13:27 +0200
|
|
||||||
Subject: [PATCH] KVM: SVM: obey guest PAT
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
For many years some users of assigned devices have reported worse
|
|
||||||
performance on AMD processors with NPT than on AMD without NPT,
|
|
||||||
Intel or bare metal.
|
|
||||||
|
|
||||||
The reason turned out to be that SVM is discarding the guest PAT
|
|
||||||
setting and uses the default (PA0=PA4=WB, PA1=PA5=WT, PA2=PA6=UC-,
|
|
||||||
PA3=UC). The guest might be using a different setting, and
|
|
||||||
especially might want write combining but isn't getting it
|
|
||||||
(instead getting slow UC or UC- accesses).
|
|
||||||
|
|
||||||
Thanks a lot to geoff@hostfission.com for noticing the relation
|
|
||||||
to the g_pat setting. The patch has been tested also by a bunch
|
|
||||||
of people on VFIO users forums.
|
|
||||||
|
|
||||||
Fixes: 709ddebf81cb40e3c36c6109a7892e8b93a09464
|
|
||||||
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196409
|
|
||||||
Cc: stable@vger.kernel.org
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
|
||||||
Tested-by: Nick Sarnie <commendsarnex@gmail.com>
|
|
||||||
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
|
|
||||||
(cherry picked from commit 15038e14724799b8c205beb5f20f9e54896013c3)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
arch/x86/kvm/svm.c | 7 +++++++
|
|
||||||
1 file changed, 7 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
|
|
||||||
index a8c911fcd73f..e9d0f80fd83a 100644
|
|
||||||
--- a/arch/x86/kvm/svm.c
|
|
||||||
+++ b/arch/x86/kvm/svm.c
|
|
||||||
@@ -3650,6 +3650,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|
||||||
u32 ecx = msr->index;
|
|
||||||
u64 data = msr->data;
|
|
||||||
switch (ecx) {
|
|
||||||
+ case MSR_IA32_CR_PAT:
|
|
||||||
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
|
|
||||||
+ return 1;
|
|
||||||
+ vcpu->arch.pat = data;
|
|
||||||
+ svm->vmcb->save.g_pat = data;
|
|
||||||
+ mark_dirty(svm->vmcb, VMCB_NPT);
|
|
||||||
+ break;
|
|
||||||
case MSR_IA32_TSC:
|
|
||||||
kvm_write_tsc(vcpu, msr);
|
|
||||||
break;
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
|
||||||
Date: Fri, 19 Jan 2018 11:12:37 +0100
|
|
||||||
Subject: [PATCH] net: sched: em_nbyte: don't add the data offset twice
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
'ptr' is shifted by the offset and then validated,
|
|
||||||
the memcmp should not add it a second time.
|
|
||||||
|
|
||||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
net/sched/em_nbyte.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
|
|
||||||
index df3110d69585..07c10bac06a0 100644
|
|
||||||
--- a/net/sched/em_nbyte.c
|
|
||||||
+++ b/net/sched/em_nbyte.c
|
|
||||||
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
|
|
||||||
if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
- return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
|
|
||||||
+ return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct tcf_ematch_ops em_nbyte_ops = {
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
|
||||||
Date: Fri, 19 Jan 2018 11:12:38 +0100
|
|
||||||
Subject: [PATCH] net: sched: fix TCF_LAYER_LINK case in tcf_get_base_ptr
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
TCF_LAYER_LINK and TCF_LAYER_NETWORK returned the same pointer as
|
|
||||||
skb->data points to the network header.
|
|
||||||
Use skb_mac_header instead.
|
|
||||||
|
|
||||||
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
include/net/pkt_cls.h | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
|
|
||||||
index 537d0a0ad4c4..4450961b1554 100644
|
|
||||||
--- a/include/net/pkt_cls.h
|
|
||||||
+++ b/include/net/pkt_cls.h
|
|
||||||
@@ -395,7 +395,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
|
|
||||||
{
|
|
||||||
switch (layer) {
|
|
||||||
case TCF_LAYER_LINK:
|
|
||||||
- return skb->data;
|
|
||||||
+ return skb_mac_header(skb);
|
|
||||||
case TCF_LAYER_NETWORK:
|
|
||||||
return skb_network_header(skb);
|
|
||||||
case TCF_LAYER_TRANSPORT:
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,127 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Alexander Duyck <alexander.h.duyck@intel.com>
|
|
||||||
Date: Wed, 4 Oct 2017 08:44:43 -0700
|
|
||||||
Subject: [PATCH] i40e: Fix memory leak related filter programming status
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
It looks like we weren't correctly placing the pages from buffers that had
|
|
||||||
been used to return a filter programming status back on the ring. As a
|
|
||||||
result they were being overwritten and tracking of the pages was lost.
|
|
||||||
|
|
||||||
This change works to correct that by incorporating part of
|
|
||||||
i40e_put_rx_buffer into the programming status handler code. As a result we
|
|
||||||
should now be correctly placing the pages for those buffers on the
|
|
||||||
re-allocation list instead of letting them stay in place.
|
|
||||||
|
|
||||||
Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status")
|
|
||||||
Reported-by: Anders K. Pedersen <akp@cohaesio.com>
|
|
||||||
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
|
|
||||||
Tested-by: Anders K Pedersen <akp@cohaesio.com>
|
|
||||||
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
|
|
||||||
(cherry picked from commit 2b9478ffc550f17c6cd8c69057234e91150f5972)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 63 ++++++++++++++++-------------
|
|
||||||
1 file changed, 36 insertions(+), 27 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
|
|
||||||
index 2194960d5855..391b1878c24b 100644
|
|
||||||
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
|
|
||||||
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
|
|
||||||
@@ -1042,6 +1042,32 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/**
|
|
||||||
+ * i40e_reuse_rx_page - page flip buffer and store it back on the ring
|
|
||||||
+ * @rx_ring: rx descriptor ring to store buffers on
|
|
||||||
+ * @old_buff: donor buffer to have page reused
|
|
||||||
+ *
|
|
||||||
+ * Synchronizes page for reuse by the adapter
|
|
||||||
+ **/
|
|
||||||
+static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
|
|
||||||
+ struct i40e_rx_buffer *old_buff)
|
|
||||||
+{
|
|
||||||
+ struct i40e_rx_buffer *new_buff;
|
|
||||||
+ u16 nta = rx_ring->next_to_alloc;
|
|
||||||
+
|
|
||||||
+ new_buff = &rx_ring->rx_bi[nta];
|
|
||||||
+
|
|
||||||
+ /* update, and store next to alloc */
|
|
||||||
+ nta++;
|
|
||||||
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
|
|
||||||
+
|
|
||||||
+ /* transfer page from old buffer to new buffer */
|
|
||||||
+ new_buff->dma = old_buff->dma;
|
|
||||||
+ new_buff->page = old_buff->page;
|
|
||||||
+ new_buff->page_offset = old_buff->page_offset;
|
|
||||||
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/**
|
|
||||||
* i40e_rx_is_programming_status - check for programming status descriptor
|
|
||||||
* @qw: qword representing status_error_len in CPU ordering
|
|
||||||
@@ -1076,15 +1102,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
|
|
||||||
union i40e_rx_desc *rx_desc,
|
|
||||||
u64 qw)
|
|
||||||
{
|
|
||||||
- u32 ntc = rx_ring->next_to_clean + 1;
|
|
||||||
+ struct i40e_rx_buffer *rx_buffer;
|
|
||||||
+ u32 ntc = rx_ring->next_to_clean;
|
|
||||||
u8 id;
|
|
||||||
|
|
||||||
/* fetch, update, and store next to clean */
|
|
||||||
+ rx_buffer = &rx_ring->rx_bi[ntc++];
|
|
||||||
ntc = (ntc < rx_ring->count) ? ntc : 0;
|
|
||||||
rx_ring->next_to_clean = ntc;
|
|
||||||
|
|
||||||
prefetch(I40E_RX_DESC(rx_ring, ntc));
|
|
||||||
|
|
||||||
+ /* place unused page back on the ring */
|
|
||||||
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
|
|
||||||
+ rx_ring->rx_stats.page_reuse_count++;
|
|
||||||
+
|
|
||||||
+ /* clear contents of buffer_info */
|
|
||||||
+ rx_buffer->page = NULL;
|
|
||||||
+
|
|
||||||
id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
|
|
||||||
I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
|
|
||||||
|
|
||||||
@@ -1643,32 +1678,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/**
|
|
||||||
- * i40e_reuse_rx_page - page flip buffer and store it back on the ring
|
|
||||||
- * @rx_ring: rx descriptor ring to store buffers on
|
|
||||||
- * @old_buff: donor buffer to have page reused
|
|
||||||
- *
|
|
||||||
- * Synchronizes page for reuse by the adapter
|
|
||||||
- **/
|
|
||||||
-static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
|
|
||||||
- struct i40e_rx_buffer *old_buff)
|
|
||||||
-{
|
|
||||||
- struct i40e_rx_buffer *new_buff;
|
|
||||||
- u16 nta = rx_ring->next_to_alloc;
|
|
||||||
-
|
|
||||||
- new_buff = &rx_ring->rx_bi[nta];
|
|
||||||
-
|
|
||||||
- /* update, and store next to alloc */
|
|
||||||
- nta++;
|
|
||||||
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
|
|
||||||
-
|
|
||||||
- /* transfer page from old buffer to new buffer */
|
|
||||||
- new_buff->dma = old_buff->dma;
|
|
||||||
- new_buff->page = old_buff->page;
|
|
||||||
- new_buff->page_offset = old_buff->page_offset;
|
|
||||||
- new_buff->pagecnt_bias = old_buff->pagecnt_bias;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
/**
|
|
||||||
* i40e_page_is_reusable - check if any reuse is possible
|
|
||||||
* @page: page struct to check
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,49 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andrew Honig <ahonig@google.com>
|
|
||||||
Date: Wed, 10 Jan 2018 10:12:03 -0800
|
|
||||||
Subject: [PATCH] KVM: x86: Add memory barrier on vmcs field lookup
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream.
|
|
||||||
|
|
||||||
This adds a memory barrier when performing a lookup into
|
|
||||||
the vmcs_field_to_offset_table. This is related to
|
|
||||||
CVE-2017-5753.
|
|
||||||
|
|
||||||
Signed-off-by: Andrew Honig <ahonig@google.com>
|
|
||||||
Reviewed-by: Jim Mattson <jmattson@google.com>
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
arch/x86/kvm/vmx.c | 12 ++++++++++--
|
|
||||||
1 file changed, 10 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
|
||||||
index 146caacd8fdd..80732f87cac0 100644
|
|
||||||
--- a/arch/x86/kvm/vmx.c
|
|
||||||
+++ b/arch/x86/kvm/vmx.c
|
|
||||||
@@ -883,8 +883,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
|
|
||||||
{
|
|
||||||
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
|
|
||||||
|
|
||||||
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
|
|
||||||
- vmcs_field_to_offset_table[field] == 0)
|
|
||||||
+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
|
|
||||||
+ return -ENOENT;
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
|
|
||||||
+ * generic mechanism.
|
|
||||||
+ */
|
|
||||||
+ asm("lfence");
|
|
||||||
+
|
|
||||||
+ if (vmcs_field_to_offset_table[field] == 0)
|
|
||||||
return -ENOENT;
|
|
||||||
|
|
||||||
return vmcs_field_to_offset_table[field];
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,102 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
|
|
||||||
Date: Wed, 13 Sep 2017 18:42:14 +0800
|
|
||||||
Subject: [PATCH] EDAC, sb_edac: Don't create a second memory controller if HA1
|
|
||||||
is not present
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Yi Zhang reported the following failure on a 2-socket Haswell (E5-2603v3)
|
|
||||||
server (DELL PowerEdge 730xd):
|
|
||||||
|
|
||||||
EDAC sbridge: Some needed devices are missing
|
|
||||||
EDAC MC: Removed device 0 for sb_edac.c Haswell SrcID#0_Ha#0: DEV 0000:7f:12.0
|
|
||||||
EDAC MC: Removed device 1 for sb_edac.c Haswell SrcID#1_Ha#0: DEV 0000:ff:12.0
|
|
||||||
EDAC sbridge: Couldn't find mci handler
|
|
||||||
EDAC sbridge: Couldn't find mci handler
|
|
||||||
EDAC sbridge: Failed to register device with error -19.
|
|
||||||
|
|
||||||
The refactored sb_edac driver creates the IMC1 (the 2nd memory
|
|
||||||
controller) if any IMC1 device is present. In this case only
|
|
||||||
HA1_TA of IMC1 was present, but the driver expected to find
|
|
||||||
HA1/HA1_TM/HA1_TAD[0-3] devices too, leading to the above failure.
|
|
||||||
|
|
||||||
The document [1] says the 'E5-2603 v3' CPU has 4 memory channels max. Yi
|
|
||||||
Zhang inserted one DIMM per channel for each CPU, and did random error
|
|
||||||
address injection test with this patch:
|
|
||||||
|
|
||||||
4024 addresses fell in TOLM hole area
|
|
||||||
12715 addresses fell in CPU_SrcID#0_Ha#0_Chan#0_DIMM#0
|
|
||||||
12774 addresses fell in CPU_SrcID#0_Ha#0_Chan#1_DIMM#0
|
|
||||||
12798 addresses fell in CPU_SrcID#0_Ha#0_Chan#2_DIMM#0
|
|
||||||
12913 addresses fell in CPU_SrcID#0_Ha#0_Chan#3_DIMM#0
|
|
||||||
12674 addresses fell in CPU_SrcID#1_Ha#0_Chan#0_DIMM#0
|
|
||||||
12686 addresses fell in CPU_SrcID#1_Ha#0_Chan#1_DIMM#0
|
|
||||||
12882 addresses fell in CPU_SrcID#1_Ha#0_Chan#2_DIMM#0
|
|
||||||
12934 addresses fell in CPU_SrcID#1_Ha#0_Chan#3_DIMM#0
|
|
||||||
106400 addresses were injected totally.
|
|
||||||
|
|
||||||
The test result shows that all the 4 channels belong to IMC0 per CPU, so
|
|
||||||
the server really only has one IMC per CPU.
|
|
||||||
|
|
||||||
In the 1st page of chapter 2 in datasheet [2], it also says 'E5-2600 v3'
|
|
||||||
implements either one or two IMCs. For CPUs with one IMC, IMC1 is not
|
|
||||||
used and should be ignored.
|
|
||||||
|
|
||||||
Thus, do not create a second memory controller if the key HA1 is absent.
|
|
||||||
|
|
||||||
[1] http://ark.intel.com/products/83349/Intel-Xeon-Processor-E5-2603-v3-15M-Cache-1_60-GHz
|
|
||||||
[2] https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf
|
|
||||||
|
|
||||||
Reported-and-tested-by: Yi Zhang <yizhan@redhat.com>
|
|
||||||
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
|
|
||||||
Cc: Tony Luck <tony.luck@intel.com>
|
|
||||||
Cc: linux-edac <linux-edac@vger.kernel.org>
|
|
||||||
Fixes: e2f747b1f42a ("EDAC, sb_edac: Assign EDAC memory controller per h/w controller")
|
|
||||||
Link: http://lkml.kernel.org/r/20170913104214.7325-1-qiuxu.zhuo@intel.com
|
|
||||||
[ Massage commit message. ]
|
|
||||||
Signed-off-by: Borislav Petkov <bp@suse.de>
|
|
||||||
(cherry picked from commit 15cc3ae001873845b5d842e212478a6570c7d938)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/edac/sb_edac.c | 9 ++++++++-
|
|
||||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
|
|
||||||
index 80d860cb0746..7a3b201d51df 100644
|
|
||||||
--- a/drivers/edac/sb_edac.c
|
|
||||||
+++ b/drivers/edac/sb_edac.c
|
|
||||||
@@ -455,6 +455,7 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
|
|
||||||
static const struct pci_id_descr pci_dev_descr_ibridge[] = {
|
|
||||||
/* Processor Home Agent */
|
|
||||||
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0, IMC0) },
|
|
||||||
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) },
|
|
||||||
|
|
||||||
/* Memory controller */
|
|
||||||
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0, IMC0) },
|
|
||||||
@@ -465,7 +466,6 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = {
|
|
||||||
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0, IMC0) },
|
|
||||||
|
|
||||||
/* Optional, mode 2HA */
|
|
||||||
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) },
|
|
||||||
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1, IMC1) },
|
|
||||||
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1, IMC1) },
|
|
||||||
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1, IMC1) },
|
|
||||||
@@ -2260,6 +2260,13 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
|
|
||||||
next_imc:
|
|
||||||
sbridge_dev = get_sbridge_dev(bus, dev_descr->dom, multi_bus, sbridge_dev);
|
|
||||||
if (!sbridge_dev) {
|
|
||||||
+ /* If the HA1 wasn't found, don't create EDAC second memory controller */
|
|
||||||
+ if (dev_descr->dom == IMC1 && devno != 1) {
|
|
||||||
+ edac_dbg(0, "Skip IMC1: %04x:%04x (since HA1 was absent)\n",
|
|
||||||
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
|
|
||||||
+ pci_dev_put(pdev);
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
if (dev_descr->dom == SOCK)
|
|
||||||
goto out_imc;
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,37 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
|
|
||||||
Date: Mon, 16 Oct 2017 12:40:29 -0500
|
|
||||||
Subject: [PATCH] EDAC, sb_edac: Fix missing break in switch
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Add missing break statement in order to prevent the code from falling
|
|
||||||
through.
|
|
||||||
|
|
||||||
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
|
|
||||||
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
|
|
||||||
Cc: linux-edac <linux-edac@vger.kernel.org>
|
|
||||||
Link: http://lkml.kernel.org/r/20171016174029.GA19757@embeddedor.com
|
|
||||||
Signed-off-by: Borislav Petkov <bp@suse.de>
|
|
||||||
(cherry picked from commit a8e9b186f153a44690ad0363a56716e7077ad28c)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/edac/sb_edac.c | 1 +
|
|
||||||
1 file changed, 1 insertion(+)
|
|
||||||
|
|
||||||
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
|
|
||||||
index 7a3b201d51df..fb0264ef83a3 100644
|
|
||||||
--- a/drivers/edac/sb_edac.c
|
|
||||||
+++ b/drivers/edac/sb_edac.c
|
|
||||||
@@ -2467,6 +2467,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
|
|
||||||
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
|
|
||||||
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA:
|
|
||||||
pvt->pci_ta = pdev;
|
|
||||||
+ break;
|
|
||||||
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
|
|
||||||
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS:
|
|
||||||
pvt->pci_ras = pdev;
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Dick Kennedy <dick.kennedy@broadcom.com>
|
|
||||||
Date: Wed, 23 Aug 2017 16:55:31 -0700
|
|
||||||
Subject: [PATCH] scsi: lpfc: Fix loop mode target discovery
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
The driver does not discover targets when in loop mode.
|
|
||||||
|
|
||||||
The NLP type is correctly getting set when a fabric connection is
|
|
||||||
detected but, not for loop. The unknown NLP type means that the driver
|
|
||||||
does not issue a PRLI when in loop topology. Thus target discovery
|
|
||||||
fails.
|
|
||||||
|
|
||||||
Fix by checking the topology during discovery. If it is loop, set the
|
|
||||||
NLP FC4 type to FCP.
|
|
||||||
|
|
||||||
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
|
|
||||||
Signed-off-by: James Smart <james.smart@broadcom.com>
|
|
||||||
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
|
|
||||||
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
|
|
||||||
(cherry picked from commit 2877cbffb79ed121a6bcc5edbe629d3aba36cd29)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
drivers/scsi/lpfc/lpfc_nportdisc.c | 3 +++
|
|
||||||
1 file changed, 3 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
|
|
||||||
index f74cb0142fd4..95b2b43ac37d 100644
|
|
||||||
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
|
|
||||||
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
|
|
||||||
@@ -1724,6 +1724,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
|
|
||||||
lpfc_nvme_update_localport(vport);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ } else if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
|
|
||||||
+ ndlp->nlp_fc4_type |= NLP_FC4_FCP;
|
|
||||||
+
|
|
||||||
} else if (ndlp->nlp_fc4_type == 0) {
|
|
||||||
rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID,
|
|
||||||
0, ndlp->nlp_DID);
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,52 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Omar Sandoval <osandov@fb.com>
|
|
||||||
Date: Tue, 5 Dec 2017 23:15:31 -0800
|
|
||||||
Subject: [PATCH] sched/wait: Fix add_wait_queue() behavioral change
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
The following cleanup commit:
|
|
||||||
|
|
||||||
50816c48997a ("sched/wait: Standardize internal naming of wait-queue entries")
|
|
||||||
|
|
||||||
... unintentionally changed the behavior of add_wait_queue() from
|
|
||||||
inserting the wait entry at the head of the wait queue to the tail
|
|
||||||
of the wait queue.
|
|
||||||
|
|
||||||
Beyond a negative performance impact this change in behavior
|
|
||||||
theoretically also breaks wait queues which mix exclusive and
|
|
||||||
non-exclusive waiters, as non-exclusive waiters will not be
|
|
||||||
woken up if they are queued behind enough exclusive waiters.
|
|
||||||
|
|
||||||
Signed-off-by: Omar Sandoval <osandov@fb.com>
|
|
||||||
Reviewed-by: Jens Axboe <axboe@kernel.dk>
|
|
||||||
Acked-by: Peter Zijlstra <peterz@infradead.org>
|
|
||||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
|
||||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
|
||||||
Cc: kernel-team@fb.com
|
|
||||||
Fixes: ("sched/wait: Standardize internal naming of wait-queue entries")
|
|
||||||
Link: http://lkml.kernel.org/r/a16c8ccffd39bd08fdaa45a5192294c784b803a7.1512544324.git.osandov@fb.com
|
|
||||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
||||||
(cherry picked from commit c6b9d9a33029014446bd9ed84c1688f6d3d4eab9)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
kernel/sched/wait.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
|
|
||||||
index d6afed6d0752..c09ebe92a40a 100644
|
|
||||||
--- a/kernel/sched/wait.c
|
|
||||||
+++ b/kernel/sched/wait.c
|
|
||||||
@@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
|
|
||||||
|
|
||||||
wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
|
|
||||||
spin_lock_irqsave(&wq_head->lock, flags);
|
|
||||||
- __add_wait_queue_entry_tail(wq_head, wq_entry);
|
|
||||||
+ __add_wait_queue(wq_head, wq_entry);
|
|
||||||
spin_unlock_irqrestore(&wq_head->lock, flags);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(add_wait_queue);
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,164 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andi Kleen <ak@linux.intel.com>
|
|
||||||
Date: Thu, 25 Jan 2018 15:50:28 -0800
|
|
||||||
Subject: [PATCH] module/retpoline: Warn about missing retpoline in module
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
There's a risk that a kernel which has full retpoline mitigations becomes
|
|
||||||
vulnerable when a module gets loaded that hasn't been compiled with the
|
|
||||||
right compiler or the right option.
|
|
||||||
|
|
||||||
To enable detection of that mismatch at module load time, add a module info
|
|
||||||
string "retpoline" at build time when the module was compiled with
|
|
||||||
retpoline support. This only covers compiled C source, but assembler source
|
|
||||||
or prebuilt object files are not checked.
|
|
||||||
|
|
||||||
If a retpoline enabled kernel detects a non retpoline protected module at
|
|
||||||
load time, print a warning and report it in the sysfs vulnerability file.
|
|
||||||
|
|
||||||
[ tglx: Massaged changelog ]
|
|
||||||
|
|
||||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
|
||||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
||||||
Cc: David Woodhouse <dwmw2@infradead.org>
|
|
||||||
Cc: gregkh@linuxfoundation.org
|
|
||||||
Cc: torvalds@linux-foundation.org
|
|
||||||
Cc: jeyu@kernel.org
|
|
||||||
Cc: arjan@linux.intel.com
|
|
||||||
Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org
|
|
||||||
(backported from commit caf7501a1b4ec964190f31f9c3f163de252273b8)
|
|
||||||
Conflicts:
|
|
||||||
arch/x86/kernel/cpu/bugs.c
|
|
||||||
context changes
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
include/linux/module.h | 9 +++++++++
|
|
||||||
arch/x86/kernel/cpu/bugs.c | 19 +++++++++++++++++--
|
|
||||||
kernel/module.c | 11 +++++++++++
|
|
||||||
scripts/mod/modpost.c | 9 +++++++++
|
|
||||||
4 files changed, 46 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/include/linux/module.h b/include/linux/module.h
|
|
||||||
index e7bdd549e527..c4fdf7661f82 100644
|
|
||||||
--- a/include/linux/module.h
|
|
||||||
+++ b/include/linux/module.h
|
|
||||||
@@ -794,6 +794,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
|
|
||||||
static inline void module_bug_cleanup(struct module *mod) {}
|
|
||||||
#endif /* CONFIG_GENERIC_BUG */
|
|
||||||
|
|
||||||
+#ifdef RETPOLINE
|
|
||||||
+extern bool retpoline_module_ok(bool has_retpoline);
|
|
||||||
+#else
|
|
||||||
+static inline bool retpoline_module_ok(bool has_retpoline)
|
|
||||||
+{
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
#ifdef CONFIG_MODULE_SIG
|
|
||||||
static inline bool module_sig_ok(struct module *module)
|
|
||||||
{
|
|
||||||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
|
|
||||||
index d5bafcdb4891..e623bd731a74 100644
|
|
||||||
--- a/arch/x86/kernel/cpu/bugs.c
|
|
||||||
+++ b/arch/x86/kernel/cpu/bugs.c
|
|
||||||
@@ -11,6 +11,7 @@
|
|
||||||
#include <linux/utsname.h>
|
|
||||||
#include <linux/cpu.h>
|
|
||||||
#include <linux/smp.h>
|
|
||||||
+#include <linux/module.h>
|
|
||||||
|
|
||||||
#include <asm/nospec-branch.h>
|
|
||||||
#include <asm/cmdline.h>
|
|
||||||
@@ -93,6 +94,19 @@ static const char *spectre_v2_strings[] = {
|
|
||||||
#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
|
|
||||||
|
|
||||||
static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
|
|
||||||
+static bool spectre_v2_bad_module;
|
|
||||||
+
|
|
||||||
+#ifdef RETPOLINE
|
|
||||||
+bool retpoline_module_ok(bool has_retpoline)
|
|
||||||
+{
|
|
||||||
+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
|
|
||||||
+ return true;
|
|
||||||
+
|
|
||||||
+ pr_err("System may be vunerable to spectre v2\n");
|
|
||||||
+ spectre_v2_bad_module = true;
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
static void __init spec2_print_if_insecure(const char *reason)
|
|
||||||
{
|
|
||||||
@@ -299,7 +313,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
|
|
||||||
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
|
||||||
return sprintf(buf, "Not affected\n");
|
|
||||||
|
|
||||||
- return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
|
||||||
- ibpb_inuse ? ", IBPB (Intel v4)" : "");
|
|
||||||
+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
|
||||||
+ ibpb_inuse ? ", IBPB (Intel v4)" : "",
|
|
||||||
+ spectre_v2_bad_module ? " - vulnerable module loaded" : "");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
diff --git a/kernel/module.c b/kernel/module.c
|
|
||||||
index e5b878b26906..de7db074f793 100644
|
|
||||||
--- a/kernel/module.c
|
|
||||||
+++ b/kernel/module.c
|
|
||||||
@@ -2855,6 +2855,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_LIVEPATCH */
|
|
||||||
|
|
||||||
+static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
|
|
||||||
+{
|
|
||||||
+ if (retpoline_module_ok(get_modinfo(info, "retpoline")))
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ pr_warn("%s: loading module not compiled with retpoline compiler.\n",
|
|
||||||
+ mod->name);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Sets info->hdr and info->len. */
|
|
||||||
static int copy_module_from_user(const void __user *umod, unsigned long len,
|
|
||||||
struct load_info *info)
|
|
||||||
@@ -3021,6 +3030,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
|
|
||||||
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ check_modinfo_retpoline(mod, info);
|
|
||||||
+
|
|
||||||
if (get_modinfo(info, "staging")) {
|
|
||||||
add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
|
|
||||||
pr_warn("%s: module is from the staging directory, the quality "
|
|
||||||
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
|
|
||||||
index 48397feb08fb..cc91f81ac33e 100644
|
|
||||||
--- a/scripts/mod/modpost.c
|
|
||||||
+++ b/scripts/mod/modpost.c
|
|
||||||
@@ -2147,6 +2147,14 @@ static void add_intree_flag(struct buffer *b, int is_intree)
|
|
||||||
buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Cannot check for assembler */
|
|
||||||
+static void add_retpoline(struct buffer *b)
|
|
||||||
+{
|
|
||||||
+ buf_printf(b, "\n#ifdef RETPOLINE\n");
|
|
||||||
+ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
|
|
||||||
+ buf_printf(b, "#endif\n");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static void add_staging_flag(struct buffer *b, const char *name)
|
|
||||||
{
|
|
||||||
static const char *staging_dir = "drivers/staging";
|
|
||||||
@@ -2492,6 +2500,7 @@ int main(int argc, char **argv)
|
|
||||||
|
|
||||||
add_header(&buf, mod);
|
|
||||||
add_intree_flag(&buf, !external_module);
|
|
||||||
+ add_retpoline(&buf);
|
|
||||||
add_staging_flag(&buf, mod->name);
|
|
||||||
err |= add_versions(&buf, mod);
|
|
||||||
add_depends(&buf, mod, modules);
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,127 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Dan Streetman <ddstreet@ieee.org>
|
|
||||||
Date: Thu, 18 Jan 2018 16:14:26 -0500
|
|
||||||
Subject: [PATCH] net: tcp: close sock if net namespace is exiting
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
When a tcp socket is closed, if it detects that its net namespace is
|
|
||||||
exiting, close immediately and do not wait for FIN sequence.
|
|
||||||
|
|
||||||
For normal sockets, a reference is taken to their net namespace, so it will
|
|
||||||
never exit while the socket is open. However, kernel sockets do not take a
|
|
||||||
reference to their net namespace, so it may begin exiting while the kernel
|
|
||||||
socket is still open. In this case if the kernel socket is a tcp socket,
|
|
||||||
it will stay open trying to complete its close sequence. The sock's dst(s)
|
|
||||||
hold a reference to their interface, which are all transferred to the
|
|
||||||
namespace's loopback interface when the real interfaces are taken down.
|
|
||||||
When the namespace tries to take down its loopback interface, it hangs
|
|
||||||
waiting for all references to the loopback interface to release, which
|
|
||||||
results in messages like:
|
|
||||||
|
|
||||||
unregister_netdevice: waiting for lo to become free. Usage count = 1
|
|
||||||
|
|
||||||
These messages continue until the socket finally times out and closes.
|
|
||||||
Since the net namespace cleanup holds the net_mutex while calling its
|
|
||||||
registered pernet callbacks, any new net namespace initialization is
|
|
||||||
blocked until the current net namespace finishes exiting.
|
|
||||||
|
|
||||||
After this change, the tcp socket notices the exiting net namespace, and
|
|
||||||
closes immediately, releasing its dst(s) and their reference to the
|
|
||||||
loopback interface, which lets the net namespace continue exiting.
|
|
||||||
|
|
||||||
Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407
|
|
||||||
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811
|
|
||||||
Signed-off-by: Dan Streetman <ddstreet@canonical.com>
|
|
||||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
include/net/net_namespace.h | 10 ++++++++++
|
|
||||||
net/ipv4/tcp.c | 3 +++
|
|
||||||
net/ipv4/tcp_timer.c | 15 +++++++++++++++
|
|
||||||
3 files changed, 28 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
|
|
||||||
index 1c401bd4c2e0..a5d023fa78db 100644
|
|
||||||
--- a/include/net/net_namespace.h
|
|
||||||
+++ b/include/net/net_namespace.h
|
|
||||||
@@ -221,6 +221,11 @@ int net_eq(const struct net *net1, const struct net *net2)
|
|
||||||
return net1 == net2;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static inline int check_net(const struct net *net)
|
|
||||||
+{
|
|
||||||
+ return atomic_read(&net->count) != 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
void net_drop_ns(void *);
|
|
||||||
|
|
||||||
#else
|
|
||||||
@@ -245,6 +250,11 @@ int net_eq(const struct net *net1, const struct net *net2)
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static inline int check_net(const struct net *net)
|
|
||||||
+{
|
|
||||||
+ return 1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
#define net_drop_ns NULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
|
|
||||||
index a3e91b552edc..fd2a086da910 100644
|
|
||||||
--- a/net/ipv4/tcp.c
|
|
||||||
+++ b/net/ipv4/tcp.c
|
|
||||||
@@ -2258,6 +2258,9 @@ void tcp_close(struct sock *sk, long timeout)
|
|
||||||
tcp_send_active_reset(sk, GFP_ATOMIC);
|
|
||||||
__NET_INC_STATS(sock_net(sk),
|
|
||||||
LINUX_MIB_TCPABORTONMEMORY);
|
|
||||||
+ } else if (!check_net(sock_net(sk))) {
|
|
||||||
+ /* Not possible to send reset; just close */
|
|
||||||
+ tcp_set_state(sk, TCP_CLOSE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
|
|
||||||
index e906014890b6..ec1e5de41653 100644
|
|
||||||
--- a/net/ipv4/tcp_timer.c
|
|
||||||
+++ b/net/ipv4/tcp_timer.c
|
|
||||||
@@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *sk)
|
|
||||||
* to prevent DoS attacks. It is called when a retransmission timeout
|
|
||||||
* or zero probe timeout occurs on orphaned socket.
|
|
||||||
*
|
|
||||||
+ * Also close if our net namespace is exiting; in that case there is no
|
|
||||||
+ * hope of ever communicating again since all netns interfaces are already
|
|
||||||
+ * down (or about to be down), and we need to release our dst references,
|
|
||||||
+ * which have been moved to the netns loopback interface, so the namespace
|
|
||||||
+ * can finish exiting. This condition is only possible if we are a kernel
|
|
||||||
+ * socket, as those do not hold references to the namespace.
|
|
||||||
+ *
|
|
||||||
* Criteria is still not confirmed experimentally and may change.
|
|
||||||
* We kill the socket, if:
|
|
||||||
* 1. If number of orphaned sockets exceeds an administratively configured
|
|
||||||
* limit.
|
|
||||||
* 2. If we have strong memory pressure.
|
|
||||||
+ * 3. If our net namespace is exiting.
|
|
||||||
*/
|
|
||||||
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
|
|
||||||
{
|
|
||||||
@@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
|
|
||||||
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ if (!check_net(sock_net(sk))) {
|
|
||||||
+ /* Not possible to send reset; just close */
|
|
||||||
+ tcp_done(sk);
|
|
||||||
+ return 1;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Vasily Averin <vvs@virtuozzo.com>
|
|
||||||
Date: Thu, 2 Nov 2017 13:03:42 +0300
|
|
||||||
Subject: [PATCH] lockd: lost rollback of set_grace_period() in
|
|
||||||
lockd_down_net()
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
Commit efda760fe95ea ("lockd: fix lockd shutdown race") is incorrect,
|
|
||||||
it removes lockd_manager and disarm grace_period_end for init_net only.
|
|
||||||
|
|
||||||
If nfsd was started from another net namespace lockd_up_net() calls
|
|
||||||
set_grace_period() that adds lockd_manager into per-netns list
|
|
||||||
and queues grace_period_end delayed work.
|
|
||||||
|
|
||||||
These action should be reverted in lockd_down_net().
|
|
||||||
Otherwise it can lead to double list_add on after restart nfsd in netns,
|
|
||||||
and to use-after-free if non-disarmed delayed work will be executed after netns destroy.
|
|
||||||
|
|
||||||
Fixes: efda760fe95e ("lockd: fix lockd shutdown race")
|
|
||||||
Cc: stable@vger.kernel.org
|
|
||||||
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
|
|
||||||
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
|
|
||||||
(cherry picked from commit 3a2b19d1ee5633f76ae8a88da7bc039a5d1732aa)
|
|
||||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
||||||
---
|
|
||||||
fs/lockd/svc.c | 2 ++
|
|
||||||
1 file changed, 2 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
|
|
||||||
index 726b6cecf430..fa8f6effcf00 100644
|
|
||||||
--- a/fs/lockd/svc.c
|
|
||||||
+++ b/fs/lockd/svc.c
|
|
||||||
@@ -274,6 +274,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
|
|
||||||
if (ln->nlmsvc_users) {
|
|
||||||
if (--ln->nlmsvc_users == 0) {
|
|
||||||
nlm_shutdown_hosts_net(net);
|
|
||||||
+ cancel_delayed_work_sync(&ln->grace_period_end);
|
|
||||||
+ locks_end_grace(&ln->lockd_manager);
|
|
||||||
svc_shutdown_net(serv, net);
|
|
||||||
dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net);
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.14.2
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user