diff --git a/patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch b/patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch new file mode 100644 index 0000000..83a64ce --- /dev/null +++ b/patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch @@ -0,0 +1,164 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 27 Sep 2023 17:19:52 -0700 +Subject: [PATCH] x86/fpu: Allow caller to constrain xfeatures when copying to + uabi buffer + +Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can +constrain which xfeatures are saved into the userspace buffer without +having to modify the user_xfeatures field in KVM's guest_fpu state. + +KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to +guest must not show up in the effective xstate_bv field of the buffer. +Saving only the guest-supported xfeatures allows userspace to load the +saved state on a different host with a fewer xfeatures, so long as the +target host supports the xfeatures that are exposed to the guest. + +KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to +the set of guest-supported xfeatures, but doing so broke KVM's historical +ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that +are supported by the *host*. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20230928001956.924301-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 18164f66e6c59fda15c198b371fa008431efdb22) +Signed-off-by: Thomas Lamprecht +--- + arch/x86/include/asm/fpu/api.h | 3 ++- + arch/x86/kernel/fpu/core.c | 5 +++-- + arch/x86/kernel/fpu/xstate.c | 7 +++++-- + arch/x86/kernel/fpu/xstate.h | 3 ++- + arch/x86/kvm/x86.c | 21 +++++++++------------ + 5 files changed, 21 insertions(+), 18 deletions(-) + +diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h +index b475d9a582b8..e829fa4c6788 100644 +--- a/arch/x86/include/asm/fpu/api.h ++++ b/arch/x86/include/asm/fpu/api.h +@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { + static inline void fpu_sync_guest_vmexit_xfd_state(void) { } + #endif + +-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); ++extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, ++ unsigned int size, u64 xfeatures, u32 pkru); + extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); + + static inline void fpstate_set_confidential(struct fpu_guest *gfpu) +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index caf33486dc5e..cddd5018e6a4 100644 +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) + EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); + + void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, +- unsigned int size, u32 pkru) ++ unsigned int size, u64 xfeatures, u32 pkru) + { + struct fpstate *kstate = gfpu->fpstate; + union fpregs_state *ustate = buf; + struct membuf mb = { .p = buf, .left = size }; + + if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { +- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); ++ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru, ++ XSTATE_COPY_XSAVE); + } else { + memcpy(&ustate->fxsave, &kstate->regs.fxsave, + sizeof(ustate->fxsave)); +diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c +index 1afbc4866b10..463ec0cd0dab 100644 +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, + * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer + * @to: membuf descriptor + * @fpstate: The fpstate buffer from which to copy ++ * @xfeatures: The mask of xfeatures to save (XSAVE mode only) + * @pkru_val: The PKRU value to store in the PKRU component + * @copy_mode: The requested copy mode + * +@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, + * It supports partial copy but @to.pos always starts from zero. + */ + void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, +- u32 pkru_val, enum xstate_copy_mode copy_mode) ++ u64 xfeatures, u32 pkru_val, ++ enum xstate_copy_mode copy_mode) + { + const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); + struct xregs_state *xinit = &init_fpstate.regs.xsave; +@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, + break; + + case XSTATE_COPY_XSAVE: +- header.xfeatures &= fpstate->user_xfeatures; ++ header.xfeatures &= fpstate->user_xfeatures & xfeatures; + break; + } + +@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode copy_mode) + { + __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, ++ tsk->thread.fpu.fpstate->user_xfeatures, + tsk->thread.pkru, copy_mode); + } + +diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h +index a4ecb04d8d64..3518fb26d06b 100644 +--- a/arch/x86/kernel/fpu/xstate.h ++++ b/arch/x86/kernel/fpu/xstate.h +@@ -43,7 +43,8 @@ enum xstate_copy_mode { + + struct membuf; + extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, +- u32 pkru_val, enum xstate_copy_mode copy_mode); ++ u64 xfeatures, u32 pkru_val, ++ enum xstate_copy_mode copy_mode); + extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode mode); + extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index ff92ff41d5ce..a43a950d04cb 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5314,26 +5314,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, + return 0; + } + +-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, +- struct kvm_xsave *guest_xsave) ++ ++static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, ++ u8 *state, unsigned int size) + { + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) + return; + +- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, +- guest_xsave->region, +- sizeof(guest_xsave->region), ++ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, ++ vcpu->arch.guest_fpu.fpstate->user_xfeatures, + vcpu->arch.pkru); + } + +-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, +- u8 *state, unsigned int size) ++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, ++ struct kvm_xsave *guest_xsave) + { +- if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) +- return; +- +- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, +- state, size, vcpu->arch.pkru); ++ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, ++ sizeof(guest_xsave->region)); + } + + static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, diff --git a/patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch b/patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch new file mode 100644 index 0000000..9154817 --- /dev/null +++ b/patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch @@ -0,0 +1,119 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 27 Sep 2023 17:19:53 -0700 +Subject: [PATCH] KVM: x86: Constrain guest-supported xfeatures only at + KVM_GET_XSAVE{2} + +Mask off xfeatures that aren't exposed to the guest only when saving guest +state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly. +Preserving the maximal set of xfeatures in user_xfeatures restores KVM's +ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu: +Limit guest user_xfeatures to supported bits of XCR0") allowed userspace +to load xfeatures that are supported by the host, irrespective of what +xfeatures are exposed to the guest. + +There is no known use case where userspace *intentionally* loads xfeatures +that aren't exposed to the guest, but the bug fixed by commit ad856280ddea +was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't +exposed to the guest, e.g. would lead to userspace unintentionally loading +guest-unsupported xfeatures when live migrating a VM. + +Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially +problematic for QEMU-based setups, as QEMU has a bug where instead of +terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops +loading guest state, i.e. resumes the guest after live migration with +incomplete guest state, and ultimately results in guest data corruption. + +Note, letting userspace restore all host-supported xfeatures does not fix +setups where a VM is migrated from a host *without* commit ad856280ddea, +to a target with a subset of host-supported xfeatures. However there is +no way to safely address that scenario, e.g. KVM could silently drop the +unsupported features, but that would be a clear violation of KVM's ABI and +so would require userspace to opt-in, at which point userspace could +simply be updated to sanitize the to-be-loaded XSAVE state. + +Reported-by: Tyler Stachecki +Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net +Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") +Cc: stable@vger.kernel.org +Cc: Leonardo Bras +Signed-off-by: Sean Christopherson +Acked-by: Dave Hansen +Message-Id: <20230928001956.924301-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 8647c52e9504c99752a39f1d44f6268f82c40a5c) +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kernel/fpu/xstate.c | 5 +---- + arch/x86/kvm/cpuid.c | 8 -------- + arch/x86/kvm/x86.c | 18 ++++++++++++++++-- + 3 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c +index 463ec0cd0dab..ebe698f8af73 100644 +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, + fpregs_restore_userregs(); + + newfps->xfeatures = curfps->xfeatures | xfeatures; +- +- if (!guest_fpu) +- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; +- ++ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + newfps->xfd = curfps->xfd & ~xfeatures; + + /* Do the final updates within the locked region */ +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 61aefeb3fdbc..e5393ee652ba 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -350,14 +350,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) + vcpu->arch.guest_supported_xcr0 = + cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); + +- /* +- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if +- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't +- * supported by the host. +- */ +- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 | +- XFEATURE_MASK_FPSSE; +- + kvm_update_pv_runtime(vcpu); + + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index a43a950d04cb..a4a44adf7c72 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5318,12 +5318,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, + static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) + { ++ /* ++ * Only copy state for features that are enabled for the guest. The ++ * state itself isn't problematic, but setting bits in the header for ++ * features that are supported in *this* host but not exposed to the ++ * guest can result in KVM_SET_XSAVE failing when live migrating to a ++ * compatible host without the features that are NOT exposed to the ++ * guest. ++ * ++ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if ++ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't ++ * supported by the host. ++ */ ++ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 | ++ XFEATURE_MASK_FPSSE; ++ + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) + return; + + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, +- vcpu->arch.guest_fpu.fpstate->user_xfeatures, +- vcpu->arch.pkru); ++ supported_xcr0, vcpu->arch.pkru); + } + + static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,