cherry-pick 6.5.11 stable release

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2023-11-12 16:38:11 +01:00
parent 3fd758c529
commit 4a4ddffc89
14 changed files with 12 additions and 543 deletions

View File

@ -75,7 +75,7 @@ index 9f2bcb8b7f96..a60a4220be95 100644
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 92302d5749d8..71387b9aca33 100644
index 139df46ed306..08450329f1a3 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -287,6 +287,106 @@ static int __init pci_apply_final_quirks(void)
@ -185,7 +185,7 @@ index 92302d5749d8..71387b9aca33 100644
/*
* Decoding should be disabled for a PCI device during BAR sizing to avoid
* conflict. But doing so may cause problems on host bridge and perhaps other
@@ -5069,6 +5169,8 @@ static const struct pci_dev_acs_enabled {
@@ -5071,6 +5171,8 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs },
/* APM X-Gene */
{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },

View File

@ -14,10 +14,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..56a45b9b602e 100644
index fe8c46c46505..db9ce84f2006 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10269,7 +10269,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
@@ -10298,7 +10298,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
if (time_after(jiffies, warning_time +
READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) {

View File

@ -78,7 +78,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
3 files changed, 21 insertions(+)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d3432687c9e6..2c20da9aa2ac 100644
index 7bdc66abfc92..e2b67975869c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -249,6 +249,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
@ -108,10 +108,10 @@ index b1658c0de847..12a02851ff57 100644
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c381770bcbf1..6690a3722007 100644
index 7bcf1a76a6ab..aa225f430299 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5413,6 +5413,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@@ -5424,6 +5424,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;

View File

@ -11,7 +11,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a377f8e0a414..3be334d34317 100644
index 656b2150643e..06fefd2a4bfa 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -298,6 +298,7 @@ static int dmar_map_gfx = 1;

View File

@ -23,7 +23,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 15 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 96936ddf1b3c..92db000409a9 100644
index 36482780a42f..43481f26a34b 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -247,18 +247,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)

View File

@ -1,42 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 13 Sep 2023 08:26:47 +0300
Subject: [PATCH] net: thunderbolt: Fix TCPv6 GSO checksum calculation
Alex reported that running ssh over IPv6 does not work with
Thunderbolt/USB4 networking driver. The reason for that is that driver
should call skb_is_gso() before calling skb_is_gso_v6(), and it should
not return false after calculates the checksum successfully. This probably
was a copy paste error from the original driver where it was done properly.
Reported-by: Alex Balcanquall <alex@alexbal.com>
Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable")
Cc: stable@vger.kernel.org
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/net/thunderbolt/main.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/net/thunderbolt/main.c b/drivers/net/thunderbolt/main.c
index 0c1e8970ee58..0a53ec293d04 100644
--- a/drivers/net/thunderbolt/main.c
+++ b/drivers/net/thunderbolt/main.c
@@ -1049,12 +1049,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
*tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, 0,
ip_hdr(skb)->protocol, 0);
- } else if (skb_is_gso_v6(skb)) {
+ } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) {
tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
*tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, 0,
IPPROTO_TCP, 0);
- return false;
} else if (protocol == htons(ETH_P_IPV6)) {
tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
*tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,

View File

@ -24,10 +24,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2ec76ab525ea..ef3215286428 100644
index c8466bc64b87..6154eda73d41 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4969,6 +4969,7 @@ static __init void svm_set_cpu_caps(void)
@@ -4983,6 +4983,7 @@ static __init void svm_set_cpu_caps(void)
if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);

View File

@ -1,134 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 7 Sep 2023 16:02:30 +0300
Subject: [PATCH] thunderbolt: Restart XDomain discovery handshake after
failure
Alex reported that after rebooting the other host the peer-to-peer link
does not come up anymore. The reason for this is that the host that was
not rebooted tries to send the UUID request only 10 times according to
the USB4 Inter-Domain spec and gives up if it does not get reply. Then
when the other side is actually ready it cannot get the link established
anymore. The USB4 Inter-Domain spec requires that the discovery protocol
is restarted in that case so implement this now.
Reported-by: Alex Balcanquall <alex@alexbal.com>
Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding")
Cc: stable@vger.kernel.org
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/thunderbolt/xdomain.c | 58 +++++++++++++++++++++++++----------
1 file changed, 41 insertions(+), 17 deletions(-)
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index 5b5566862318..9803f0bbf20d 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -703,6 +703,27 @@ static void update_property_block(struct tb_xdomain *xd)
mutex_unlock(&xdomain_lock);
}
+static void start_handshake(struct tb_xdomain *xd)
+{
+ xd->state = XDOMAIN_STATE_INIT;
+ queue_delayed_work(xd->tb->wq, &xd->state_work,
+ msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+}
+
+/* Can be called from state_work */
+static void __stop_handshake(struct tb_xdomain *xd)
+{
+ cancel_delayed_work_sync(&xd->properties_changed_work);
+ xd->properties_changed_retries = 0;
+ xd->state_retries = 0;
+}
+
+static void stop_handshake(struct tb_xdomain *xd)
+{
+ cancel_delayed_work_sync(&xd->state_work);
+ __stop_handshake(xd);
+}
+
static void tb_xdp_handle_request(struct work_struct *work)
{
struct xdomain_request_work *xw = container_of(work, typeof(*xw), work);
@@ -765,6 +786,15 @@ static void tb_xdp_handle_request(struct work_struct *work)
case UUID_REQUEST:
tb_dbg(tb, "%llx: received XDomain UUID request\n", route);
ret = tb_xdp_uuid_response(ctl, route, sequence, uuid);
+ /*
+ * If we've stopped the discovery with an error such as
+ * timing out, we will restart the handshake now that we
+ * received UUID request from the remote host.
+ */
+ if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) {
+ dev_dbg(&xd->dev, "restarting handshake\n");
+ start_handshake(xd);
+ }
break;
case LINK_STATE_STATUS_REQUEST:
@@ -1521,6 +1551,13 @@ static void tb_xdomain_queue_properties_changed(struct tb_xdomain *xd)
msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
}
+static void tb_xdomain_failed(struct tb_xdomain *xd)
+{
+ xd->state = XDOMAIN_STATE_ERROR;
+ queue_delayed_work(xd->tb->wq, &xd->state_work,
+ msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT));
+}
+
static void tb_xdomain_state_work(struct work_struct *work)
{
struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work);
@@ -1547,7 +1584,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
if (ret) {
if (ret == -EAGAIN)
goto retry_state;
- xd->state = XDOMAIN_STATE_ERROR;
+ tb_xdomain_failed(xd);
} else {
tb_xdomain_queue_properties_changed(xd);
if (xd->bonding_possible)
@@ -1612,7 +1649,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
if (ret) {
if (ret == -EAGAIN)
goto retry_state;
- xd->state = XDOMAIN_STATE_ERROR;
+ tb_xdomain_failed(xd);
} else {
xd->state = XDOMAIN_STATE_ENUMERATED;
}
@@ -1623,6 +1660,8 @@ static void tb_xdomain_state_work(struct work_struct *work)
break;
case XDOMAIN_STATE_ERROR:
+ dev_dbg(&xd->dev, "discovery failed, stopping handshake\n");
+ __stop_handshake(xd);
break;
default:
@@ -1833,21 +1872,6 @@ static void tb_xdomain_release(struct device *dev)
kfree(xd);
}
-static void start_handshake(struct tb_xdomain *xd)
-{
- xd->state = XDOMAIN_STATE_INIT;
- queue_delayed_work(xd->tb->wq, &xd->state_work,
- msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
-}
-
-static void stop_handshake(struct tb_xdomain *xd)
-{
- cancel_delayed_work_sync(&xd->properties_changed_work);
- cancel_delayed_work_sync(&xd->state_work);
- xd->properties_changed_retries = 0;
- xd->state_retries = 0;
-}
-
static int __maybe_unused tb_xdomain_suspend(struct device *dev)
{
stop_handshake(tb_to_xdomain(dev));

View File

@ -1,72 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Sat, 7 Oct 2023 12:57:02 +0200
Subject: [PATCH] x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Fix erratum #1485 on Zen4 parts where running with STIBP disabled can
cause an #UD exception. The performance impact of the fix is negligible.
Reported-by: René Rebe <rene@exactcode.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: René Rebe <rene@exactcode.de>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/include/asm/msr-index.h | 9 +++++++--
arch/x86/kernel/cpu/amd.c | 8 ++++++++
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1d111350197f..b37abb55e948 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -637,12 +637,17 @@
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
-/* Fam 17h MSRs */
-#define MSR_F17H_IRPERF 0xc00000e9
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+/* Zen 2 */
#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7eca6a8abbb1..981bc23665a3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -80,6 +80,10 @@ static const int amd_div0[] =
AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+static const int amd_erratum_1485[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
+ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
+
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
int osvw_id = *erratum++;
@@ -1140,6 +1144,10 @@ static void init_amd(struct cpuinfo_x86 *c)
pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
setup_force_cpu_bug(X86_BUG_DIV0);
}
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ cpu_has_amd_erratum(c, amd_erratum_1485))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
}
#ifdef CONFIG_X86_32

View File

@ -1,164 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 27 Sep 2023 17:19:52 -0700
Subject: [PATCH] x86/fpu: Allow caller to constrain xfeatures when copying to
uabi buffer
Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can
constrain which xfeatures are saved into the userspace buffer without
having to modify the user_xfeatures field in KVM's guest_fpu state.
KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to
guest must not show up in the effective xstate_bv field of the buffer.
Saving only the guest-supported xfeatures allows userspace to load the
saved state on a different host with a fewer xfeatures, so long as the
target host supports the xfeatures that are exposed to the guest.
KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to
the set of guest-supported xfeatures, but doing so broke KVM's historical
ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that
are supported by the *host*.
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230928001956.924301-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 18164f66e6c59fda15c198b371fa008431efdb22)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/include/asm/fpu/api.h | 3 ++-
arch/x86/kernel/fpu/core.c | 5 +++--
arch/x86/kernel/fpu/xstate.c | 7 +++++--
arch/x86/kernel/fpu/xstate.h | 3 ++-
arch/x86/kvm/x86.c | 21 +++++++++------------
5 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b475d9a582b8..e829fa4c6788 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif
-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+ unsigned int size, u64 xfeatures, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 98e507cc7d34..b582325b9c37 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
- unsigned int size, u32 pkru)
+ unsigned int size, u64 xfeatures, u32 pkru)
{
struct fpstate *kstate = gfpu->fpstate;
union fpregs_state *ustate = buf;
struct membuf mb = { .p = buf, .left = size };
if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
+ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
+ XSTATE_COPY_XSAVE);
} else {
memcpy(&ustate->fxsave, &kstate->regs.fxsave,
sizeof(ustate->fxsave));
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1afbc4866b10..463ec0cd0dab 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
* @to: membuf descriptor
* @fpstate: The fpstate buffer from which to copy
+ * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
* @pkru_val: The PKRU value to store in the PKRU component
* @copy_mode: The requested copy mode
*
@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* It supports partial copy but @to.pos always starts from zero.
*/
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode)
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode)
{
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
break;
case XSTATE_COPY_XSAVE:
- header.xfeatures &= fpstate->user_xfeatures;
+ header.xfeatures &= fpstate->user_xfeatures & xfeatures;
break;
}
@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
+ tsk->thread.fpu.fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index a4ecb04d8d64..3518fb26d06b 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -43,7 +43,8 @@ enum xstate_copy_mode {
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode);
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6690a3722007..394d3a8b4682 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5385,26 +5385,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
- struct kvm_xsave *guest_xsave)
+
+static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+ u8 *state, unsigned int size)
{
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- guest_xsave->region,
- sizeof(guest_xsave->region),
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+ vcpu->arch.guest_fpu.fpstate->user_xfeatures,
vcpu->arch.pkru);
}
-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
- u8 *state, unsigned int size)
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
{
- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
-
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- state, size, vcpu->arch.pkru);
+ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,

View File

@ -1,119 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 27 Sep 2023 17:19:53 -0700
Subject: [PATCH] KVM: x86: Constrain guest-supported xfeatures only at
KVM_GET_XSAVE{2}
Mask off xfeatures that aren't exposed to the guest only when saving guest
state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly.
Preserving the maximal set of xfeatures in user_xfeatures restores KVM's
ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu:
Limit guest user_xfeatures to supported bits of XCR0") allowed userspace
to load xfeatures that are supported by the host, irrespective of what
xfeatures are exposed to the guest.
There is no known use case where userspace *intentionally* loads xfeatures
that aren't exposed to the guest, but the bug fixed by commit ad856280ddea
was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't
exposed to the guest, e.g. would lead to userspace unintentionally loading
guest-unsupported xfeatures when live migrating a VM.
Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially
problematic for QEMU-based setups, as QEMU has a bug where instead of
terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops
loading guest state, i.e. resumes the guest after live migration with
incomplete guest state, and ultimately results in guest data corruption.
Note, letting userspace restore all host-supported xfeatures does not fix
setups where a VM is migrated from a host *without* commit ad856280ddea,
to a target with a subset of host-supported xfeatures. However there is
no way to safely address that scenario, e.g. KVM could silently drop the
unsupported features, but that would be a clear violation of KVM's ABI and
so would require userspace to opt-in, at which point userspace could
simply be updated to sanitize the to-be-loaded XSAVE state.
Reported-by: Tyler Stachecki <stachecki.tyler@gmail.com>
Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net
Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0")
Cc: stable@vger.kernel.org
Cc: Leonardo Bras <leobras@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Message-Id: <20230928001956.924301-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8647c52e9504c99752a39f1d44f6268f82c40a5c)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kernel/fpu/xstate.c | 5 +----
arch/x86/kvm/cpuid.c | 8 --------
arch/x86/kvm/x86.c | 18 ++++++++++++++++--
3 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 463ec0cd0dab..ebe698f8af73 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
-
- if (!guest_fpu)
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
-
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2c20da9aa2ac..e2b67975869c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -332,14 +332,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
- /*
- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
- * supported by the host.
- */
- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
- XFEATURE_MASK_FPSSE;
-
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 394d3a8b4682..e0cea0f8380a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5389,12 +5389,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
u8 *state, unsigned int size)
{
+ /*
+ * Only copy state for features that are enabled for the guest. The
+ * state itself isn't problematic, but setting bits in the header for
+ * features that are supported in *this* host but not exposed to the
+ * guest can result in KVM_SET_XSAVE failing when live migrating to a
+ * compatible host without the features that are NOT exposed to the
+ * guest.
+ *
+ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+ * supported by the host.
+ */
+ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
+ XFEATURE_MASK_FPSSE;
+
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
- vcpu->arch.guest_fpu.fpstate->user_xfeatures,
- vcpu->arch.pkru);
+ supported_xcr0, vcpu->arch.pkru);
}
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,

@ -1 +1 @@
Subproject commit 67cc773d350984cd7fe77d26adb1f4f0fa434ebc
Subproject commit 6897e6aedfb5c92966fd56014490c3d624c7c469