From f6df304f26cc1231a3109fe1f8f383d5c418e71d Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Wed, 27 Jul 2022 13:45:07 +0200 Subject: [PATCH] backport "SMM emulation and interrupt shadow fixes" Signed-off-by: Thomas Lamprecht --- ...r-em_sysexit-should-update-ctxt-mode.patch | 31 ++ ...ator-introduce-update_emulation_mode.patch | 158 ++++++++++ ...-emulator-remove-assign_eip_near-far.patch | 127 ++++++++ ...-update-the-emulation-mode-after-rsm.patch | 34 +++ ...-update-the-emulation-mode-after-CR0.patch | 46 +++ ...-smm-add-structs-for-KVM-s-smram-lay.patch | 166 +++++++++++ ...-smm-use-smram-struct-for-32-bit-smr.patch | 268 +++++++++++++++++ ...-smm-use-smram-struct-for-64-bit-smr.patch | 279 ++++++++++++++++++ .../0023-KVM-x86-SVM-use-smram-structs.patch | 111 +++++++ ...-smm-preserve-interrupt-shadow-in-SM.patch | 167 +++++++++++ 10 files changed, 1387 insertions(+) create mode 100644 patches/kernel/0015-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch create mode 100644 patches/kernel/0016-KVM-x86-emulator-introduce-update_emulation_mode.patch create mode 100644 patches/kernel/0017-KVM-x86-emulator-remove-assign_eip_near-far.patch create mode 100644 patches/kernel/0018-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch create mode 100644 patches/kernel/0019-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch create mode 100644 patches/kernel/0020-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch create mode 100644 patches/kernel/0021-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch create mode 100644 patches/kernel/0022-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch create mode 100644 patches/kernel/0023-KVM-x86-SVM-use-smram-structs.patch create mode 100644 patches/kernel/0024-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch diff --git a/patches/kernel/0015-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch b/patches/kernel/0015-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch new file mode 100644 index 0000000..732349d --- /dev/null +++ b/patches/kernel/0015-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch @@ -0,0 +1,31 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:08:52 +0300 +Subject: [PATCH] KVM: x86: emulator: em_sysexit should update ctxt->mode + +This is one of the instructions that can change the +processor mode. + +Note that this is likely a benign bug, because the only problematic +mode change is from 32 bit to 64 bit which can lead to truncation of RIP, +and it is not possible to do with sysexit, +since sysexit running in 32 bit mode will be limited to 32 bit version. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 318a78379ca6..35b12692739c 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2862,6 +2862,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); + + ctxt->_eip = rdx; ++ ctxt->mode = usermode; + *reg_write(ctxt, VCPU_REGS_RSP) = rcx; + + return X86EMUL_CONTINUE; diff --git a/patches/kernel/0016-KVM-x86-emulator-introduce-update_emulation_mode.patch b/patches/kernel/0016-KVM-x86-emulator-introduce-update_emulation_mode.patch new file mode 100644 index 0000000..cb54bf1 --- /dev/null +++ b/patches/kernel/0016-KVM-x86-emulator-introduce-update_emulation_mode.patch @@ -0,0 +1,158 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:08:53 +0300 +Subject: [PATCH] KVM: x86: emulator: introduce update_emulation_mode + +Some instructions update the cpu execution mode, which needs +to update the emulation mode. + +Extract this code, and make assign_eip_far use it. + +assign_eip_far now reads CS, instead of getting it via a parameter, +which is ok, because callers always assign CS to the +same value before calling it. + +No functional change is intended. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 85 ++++++++++++++++++++++++++++-------------- + 1 file changed, 57 insertions(+), 28 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 35b12692739c..1b5123a882a1 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -795,8 +795,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, + ctxt->mode, linear); + } + +-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, +- enum x86emul_mode mode) ++static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) + { + ulong linear; + int rc; +@@ -806,41 +805,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, + + if (ctxt->op_bytes != sizeof(unsigned long)) + addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); +- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); ++ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->_eip = addr.ea; + return rc; + } + ++static inline int update_emulation_mode(struct x86_emulate_ctxt *ctxt) ++{ ++ u64 efer; ++ struct desc_struct cs; ++ u16 selector; ++ u32 base3; ++ ++ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); ++ ++ if (!ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE) { ++ /* Real mode. cpu must not have long mode active */ ++ if (efer & EFER_LMA) ++ return X86EMUL_UNHANDLEABLE; ++ ctxt->mode = X86EMUL_MODE_REAL; ++ return X86EMUL_CONTINUE; ++ } ++ ++ if (ctxt->eflags & X86_EFLAGS_VM) { ++ /* Protected/VM86 mode. cpu must not have long mode active */ ++ if (efer & EFER_LMA) ++ return X86EMUL_UNHANDLEABLE; ++ ctxt->mode = X86EMUL_MODE_VM86; ++ return X86EMUL_CONTINUE; ++ } ++ ++ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS)) ++ return X86EMUL_UNHANDLEABLE; ++ ++ if (efer & EFER_LMA) { ++ if (cs.l) { ++ /* Proper long mode */ ++ ctxt->mode = X86EMUL_MODE_PROT64; ++ } else if (cs.d) { ++ /* 32 bit compatibility mode*/ ++ ctxt->mode = X86EMUL_MODE_PROT32; ++ } else { ++ ctxt->mode = X86EMUL_MODE_PROT16; ++ } ++ } else { ++ /* Legacy 32 bit / 16 bit mode */ ++ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; ++ } ++ ++ return X86EMUL_CONTINUE; ++} ++ + static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) + { +- return assign_eip(ctxt, dst, ctxt->mode); ++ return assign_eip(ctxt, dst); + } + +-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, +- const struct desc_struct *cs_desc) ++static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst) + { +- enum x86emul_mode mode = ctxt->mode; +- int rc; ++ int rc = update_emulation_mode(ctxt); + +-#ifdef CONFIG_X86_64 +- if (ctxt->mode >= X86EMUL_MODE_PROT16) { +- if (cs_desc->l) { +- u64 efer = 0; ++ if (rc != X86EMUL_CONTINUE) ++ return rc; + +- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); +- if (efer & EFER_LMA) +- mode = X86EMUL_MODE_PROT64; +- } else +- mode = X86EMUL_MODE_PROT32; /* temporary value */ +- } +-#endif +- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) +- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; +- rc = assign_eip(ctxt, dst, mode); +- if (rc == X86EMUL_CONTINUE) +- ctxt->mode = mode; +- return rc; ++ return assign_eip(ctxt, dst); + } + + static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +@@ -2154,7 +2183,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); ++ rc = assign_eip_far(ctxt, ctxt->src.val); + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -2235,7 +2264,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) + &new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; +- rc = assign_eip_far(ctxt, eip, &new_desc); ++ rc = assign_eip_far(ctxt, eip); + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -3459,7 +3488,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); ++ rc = assign_eip_far(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + goto fail; + diff --git a/patches/kernel/0017-KVM-x86-emulator-remove-assign_eip_near-far.patch b/patches/kernel/0017-KVM-x86-emulator-remove-assign_eip_near-far.patch new file mode 100644 index 0000000..97ba427 --- /dev/null +++ b/patches/kernel/0017-KVM-x86-emulator-remove-assign_eip_near-far.patch @@ -0,0 +1,127 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:08:54 +0300 +Subject: [PATCH] KVM: x86: emulator: remove assign_eip_near/far + +Now the assign_eip_far just updates the emulation mode in addition to +updating the rip, it doesn't make sense to keep that function. + +Move mode update to the callers and remove these functions. + +No functional change is intended. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 47 +++++++++++++++++++++--------------------- + 1 file changed, 24 insertions(+), 23 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 1b5123a882a1..9e305e0cd815 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -857,24 +857,9 @@ static inline int update_emulation_mode(struct x86_emulate_ctxt *ctxt) + return X86EMUL_CONTINUE; + } + +-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) +-{ +- return assign_eip(ctxt, dst); +-} +- +-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst) +-{ +- int rc = update_emulation_mode(ctxt); +- +- if (rc != X86EMUL_CONTINUE) +- return rc; +- +- return assign_eip(ctxt, dst); +-} +- + static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) + { +- return assign_eip_near(ctxt, ctxt->_eip + rel); ++ return assign_eip(ctxt, ctxt->_eip + rel); + } + + static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear, +@@ -2183,7 +2168,12 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val); ++ rc = update_emulation_mode(ctxt); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ ++ rc = assign_eip(ctxt, ctxt->src.val); ++ + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -2193,7 +2183,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) + + static int em_jmp_abs(struct x86_emulate_ctxt *ctxt) + { +- return assign_eip_near(ctxt, ctxt->src.val); ++ return assign_eip(ctxt, ctxt->src.val); + } + + static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) +@@ -2202,7 +2192,7 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) + long int old_eip; + + old_eip = ctxt->_eip; +- rc = assign_eip_near(ctxt, ctxt->src.val); ++ rc = assign_eip(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->src.val = old_eip; +@@ -2240,7 +2230,7 @@ static int em_ret(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + +- return assign_eip_near(ctxt, eip); ++ return assign_eip(ctxt, eip); + } + + static int em_ret_far(struct x86_emulate_ctxt *ctxt) +@@ -2264,7 +2254,13 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) + &new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; +- rc = assign_eip_far(ctxt, eip); ++ ++ rc = update_emulation_mode(ctxt); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ ++ rc = assign_eip(ctxt, eip); ++ + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -3488,7 +3484,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val); ++ rc = update_emulation_mode(ctxt); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ ++ rc = assign_eip(ctxt, ctxt->src.val); ++ + if (rc != X86EMUL_CONTINUE) + goto fail; + +@@ -3521,7 +3522,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; +- rc = assign_eip_near(ctxt, eip); ++ rc = assign_eip(ctxt, eip); + if (rc != X86EMUL_CONTINUE) + return rc; + rsp_increment(ctxt, ctxt->src.val); diff --git a/patches/kernel/0018-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch b/patches/kernel/0018-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch new file mode 100644 index 0000000..b18ccf5 --- /dev/null +++ b/patches/kernel/0018-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch @@ -0,0 +1,34 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:08:55 +0300 +Subject: [PATCH] KVM: x86: emulator: update the emulation mode after rsm + +This ensures that RIP will be correctly written back, +because the RSM instruction can switch the CPU mode from +32 bit (or less) to 64 bit. + +This fixes a guest crash in case the #SMI is received +while the guest runs a code from an address > 32 bit. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 9e305e0cd815..c582639ea2b4 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2635,6 +2635,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) + if (ret != X86EMUL_CONTINUE) + goto emulate_shutdown; + ++ ++ ret = update_emulation_mode(ctxt); ++ if (ret != X86EMUL_CONTINUE) ++ goto emulate_shutdown; ++ + /* + * Note, the ctxt->ops callbacks are responsible for handling side + * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID diff --git a/patches/kernel/0019-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch b/patches/kernel/0019-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch new file mode 100644 index 0000000..21c6732 --- /dev/null +++ b/patches/kernel/0019-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch @@ -0,0 +1,46 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:08:56 +0300 +Subject: [PATCH] KVM: x86: emulator: update the emulation mode after CR0 write + +CR0.PE toggles real/protected mode, thus its update +should update the emulation mode. + +This is likely a benign bug because there is no writeback +of state, other than the RIP increment, and when toggling +CR0.PE, the CPU has to execute code from a very low memory address. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index c582639ea2b4..38d9bfa650ec 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -3636,11 +3636,22 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) + + static int em_cr_write(struct x86_emulate_ctxt *ctxt) + { +- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) ++ int cr_num = ctxt->modrm_reg; ++ int r; ++ ++ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val)) + return emulate_gp(ctxt, 0); + + /* Disable writeback. */ + ctxt->dst.type = OP_NONE; ++ ++ if (cr_num == 0) { ++ /* CR0 write might have updated CR0.PE */ ++ r = update_emulation_mode(ctxt); ++ if (r != X86EMUL_CONTINUE) ++ return r; ++ } ++ + return X86EMUL_CONTINUE; + } + diff --git a/patches/kernel/0020-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch b/patches/kernel/0020-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch new file mode 100644 index 0000000..e2e51f7 --- /dev/null +++ b/patches/kernel/0020-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch @@ -0,0 +1,166 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:08:58 +0300 +Subject: [PATCH] KVM: x86: emulator/smm: add structs for KVM's smram layout + +Those structs will be used to read/write the smram state image. + +Also document the differences between KVM's SMRAM layout and SMRAM +layout that is used by real Intel/AMD cpus. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/kvm_emulate.h | 139 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 139 insertions(+) + +diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h +index fb09cd22cb7f..d16b377be70b 100644 +--- a/arch/x86/kvm/kvm_emulate.h ++++ b/arch/x86/kvm/kvm_emulate.h +@@ -482,6 +482,145 @@ enum x86_intercept { + nr_x86_intercepts + }; + ++ ++/* ++ * 32 bit KVM's emulated SMM layout ++ * Loosely based on Intel's layout ++ */ ++ ++struct kvm_smm_seg_state_32 { ++ u32 flags; ++ u32 limit; ++ u32 base; ++} __packed; ++ ++struct kvm_smram_state_32 { ++ ++ u32 reserved1[62]; /* FE00 - FEF7 */ ++ u32 smbase; /* FEF8 */ ++ u32 smm_revision; /* FEFC */ ++ u32 reserved2[5]; /* FF00-FF13 */ ++ /* CR4 is not present in Intel/AMD SMRAM image*/ ++ u32 cr4; /* FF14 */ ++ u32 reserved3[5]; /* FF18 */ ++ ++ /* ++ * Segment state is not present/documented in the ++ * Intel/AMD SMRAM image ++ */ ++ struct kvm_smm_seg_state_32 ds; /* FF2C */ ++ struct kvm_smm_seg_state_32 fs; /* FF38 */ ++ struct kvm_smm_seg_state_32 gs; /* FF44 */ ++ /* idtr has only base and limit*/ ++ struct kvm_smm_seg_state_32 idtr; /* FF50 */ ++ struct kvm_smm_seg_state_32 tr; /* FF5C */ ++ u32 reserved; /* FF68 */ ++ /* gdtr has only base and limit*/ ++ struct kvm_smm_seg_state_32 gdtr; /* FF6C */ ++ struct kvm_smm_seg_state_32 ldtr; /* FF78 */ ++ struct kvm_smm_seg_state_32 es; /* FF84 */ ++ struct kvm_smm_seg_state_32 cs; /* FF90 */ ++ struct kvm_smm_seg_state_32 ss; /* FF9C */ ++ ++ u32 es_sel; /* FFA8 */ ++ u32 cs_sel; /* FFAC */ ++ u32 ss_sel; /* FFB0 */ ++ u32 ds_sel; /* FFB4 */ ++ u32 fs_sel; /* FFB8 */ ++ u32 gs_sel; /* FFBC */ ++ u32 ldtr_sel; /* FFC0 */ ++ u32 tr_sel; /* FFC4 */ ++ ++ u32 dr7; /* FFC8 */ ++ u32 dr6; /* FFCC */ ++ ++ /* GPRS in the "natural" X86 order (RAX/RCX/RDX.../RDI)*/ ++ u32 gprs[8]; /* FFD0-FFEC */ ++ ++ u32 eip; /* FFF0 */ ++ u32 eflags; /* FFF4 */ ++ u32 cr3; /* FFF8 */ ++ u32 cr0; /* FFFC */ ++} __packed; ++ ++/* ++ * 64 bit KVM's emulated SMM layout ++ * Based on AMD64 layout ++ */ ++ ++struct kvm_smm_seg_state_64 { ++ u16 selector; ++ u16 attributes; ++ u32 limit; ++ u64 base; ++}; ++ ++struct kvm_smram_state_64 { ++ struct kvm_smm_seg_state_64 es; /* FE00 (R/O) */ ++ struct kvm_smm_seg_state_64 cs; /* FE10 (R/O) */ ++ struct kvm_smm_seg_state_64 ss; /* FE20 (R/O) */ ++ struct kvm_smm_seg_state_64 ds; /* FE30 (R/O) */ ++ struct kvm_smm_seg_state_64 fs; /* FE40 (R/O) */ ++ struct kvm_smm_seg_state_64 gs; /* FE50 (R/O) */ ++ ++ /* gdtr has only base and limit*/ ++ struct kvm_smm_seg_state_64 gdtr; /* FE60 (R/O) */ ++ struct kvm_smm_seg_state_64 ldtr; /* FE70 (R/O) */ ++ ++ /* idtr has only base and limit*/ ++ struct kvm_smm_seg_state_64 idtr; /* FE80 (R/O) */ ++ struct kvm_smm_seg_state_64 tr; /* FE90 (R/O) */ ++ ++ /* I/O restart and auto halt restart are not implemented by KVM */ ++ u64 io_restart_rip; /* FEA0 (R/O) */ ++ u64 io_restart_rcx; /* FEA8 (R/O) */ ++ u64 io_restart_rsi; /* FEB0 (R/O) */ ++ u64 io_restart_rdi; /* FEB8 (R/O) */ ++ u32 io_restart_dword; /* FEC0 (R/O) */ ++ u32 reserved1; /* FEC4 */ ++ u8 io_instruction_restart; /* FEC8 (R/W) */ ++ u8 auto_halt_restart; /* FEC9 (R/W) */ ++ u8 reserved2[6]; /* FECA-FECF */ ++ ++ u64 efer; /* FED0 (R/O) */ ++ ++ /* ++ * Implemented on AMD only, to store current SVM guest address. ++ * svm_guest_virtual_int has unknown purpose, not implemented. ++ */ ++ ++ u64 svm_guest_flag; /* FED8 (R/O) */ ++ u64 svm_guest_vmcb_gpa; /* FEE0 (R/O) */ ++ u64 svm_guest_virtual_int; /* FEE8 (R/O) */ ++ ++ u32 reserved3[3]; /* FEF0-FEFB */ ++ u32 smm_revison; /* FEFC (R/O) */ ++ u32 smbase; /* FFF0 (R/W) */ ++ u32 reserved4[5]; /* FF04-FF17 */ ++ ++ /* SSP and SVM fields below are not implemented by KVM */ ++ u64 ssp; /* FF18 (R/W) */ ++ u64 svm_guest_pat; /* FF20 (R/O) */ ++ u64 svm_host_efer; /* FF28 (R/O) */ ++ u64 svm_host_cr4; /* FF30 (R/O) */ ++ u64 svm_host_cr3; /* FF38 (R/O) */ ++ u64 svm_host_cr0; /* FF40 (R/O) */ ++ ++ u64 cr4; /* FF48 (R/O) */ ++ u64 cr3; /* FF50 (R/O) */ ++ u64 cr0; /* FF58 (R/O) */ ++ ++ u64 dr7; /* FF60 (R/O) */ ++ u64 dr6; /* FF68 (R/O) */ ++ ++ u64 rflags; /* FF70 (R/W) */ ++ u64 rip; /* FF78 (R/W) */ ++ ++ /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */ ++ u64 gprs[16]; /* FF80-FFFF (R/W) */ ++}; ++ ++ + /* Host execution mode. */ + #if defined(CONFIG_X86_32) + #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 diff --git a/patches/kernel/0021-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch b/patches/kernel/0021-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch new file mode 100644 index 0000000..ba324ae --- /dev/null +++ b/patches/kernel/0021-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch @@ -0,0 +1,268 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:08:59 +0300 +Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 32 bit smram + load/restore + +Use kvm_smram_state_32 struct to save/restore 32 bit SMM state +(used when X86_FEATURE_LM is not present in the guest CPUID). + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 81 +++++++++++++++--------------------------- + arch/x86/kvm/x86.c | 75 +++++++++++++++++--------------------- + 2 files changed, 60 insertions(+), 96 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 38d9bfa650ec..44ce8d51f18b 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2340,25 +2340,17 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) + desc->type = (flags >> 8) & 15; + } + +-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, ++static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, ++ struct kvm_smm_seg_state_32 *state, ++ u16 selector, + int n) + { + struct desc_struct desc; +- int offset; +- u16 selector; +- +- selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); +- +- if (n < 3) +- offset = 0x7f84 + n * 12; +- else +- offset = 0x7f2c + (n - 3) * 12; + +- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); +- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); +- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); ++ set_desc_base(&desc, state->base); ++ set_desc_limit(&desc, state->limit); ++ rsm_set_desc_flags(&desc, state->flags); + ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); +- return X86EMUL_CONTINUE; + } + + #ifdef CONFIG_X86_64 +@@ -2429,63 +2421,46 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, + } + + static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, +- const char *smstate) ++ struct kvm_smram_state_32 *smstate) + { +- struct desc_struct desc; + struct desc_ptr dt; +- u16 selector; +- u32 val, cr0, cr3, cr4; + int i; + +- cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); +- cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); +- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; +- ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); ++ ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; ++ ctxt->_eip = smstate->eip; + + for (i = 0; i < 8; i++) +- *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); +- +- val = GET_SMSTATE(u32, smstate, 0x7fcc); ++ *reg_write(ctxt, i) = smstate->gprs[i]; + +- if (ctxt->ops->set_dr(ctxt, 6, val)) ++ if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6)) + return X86EMUL_UNHANDLEABLE; +- +- val = GET_SMSTATE(u32, smstate, 0x7fc8); +- +- if (ctxt->ops->set_dr(ctxt, 7, val)) ++ if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7)) + return X86EMUL_UNHANDLEABLE; + +- selector = GET_SMSTATE(u32, smstate, 0x7fc4); +- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); +- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); +- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); +- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); ++ rsm_load_seg_32(ctxt, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR); ++ rsm_load_seg_32(ctxt, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR); + +- selector = GET_SMSTATE(u32, smstate, 0x7fc0); +- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); +- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); +- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); +- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); + +- dt.address = GET_SMSTATE(u32, smstate, 0x7f74); +- dt.size = GET_SMSTATE(u32, smstate, 0x7f70); ++ dt.address = smstate->gdtr.base; ++ dt.size = smstate->gdtr.limit; + ctxt->ops->set_gdt(ctxt, &dt); + +- dt.address = GET_SMSTATE(u32, smstate, 0x7f58); +- dt.size = GET_SMSTATE(u32, smstate, 0x7f54); ++ dt.address = smstate->idtr.base; ++ dt.size = smstate->idtr.limit; + ctxt->ops->set_idt(ctxt, &dt); + +- for (i = 0; i < 6; i++) { +- int r = rsm_load_seg_32(ctxt, smstate, i); +- if (r != X86EMUL_CONTINUE) +- return r; +- } ++ rsm_load_seg_32(ctxt, &smstate->es, smstate->es_sel, VCPU_SREG_ES); ++ rsm_load_seg_32(ctxt, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS); ++ rsm_load_seg_32(ctxt, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS); + +- cr4 = GET_SMSTATE(u32, smstate, 0x7f14); ++ rsm_load_seg_32(ctxt, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS); ++ rsm_load_seg_32(ctxt, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS); ++ rsm_load_seg_32(ctxt, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS); + +- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); ++ ctxt->ops->set_smbase(ctxt, smstate->smbase); + +- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); ++ return rsm_enter_protected_mode(ctxt, smstate->cr0, ++ smstate->cr3, smstate->cr4); + } + + #ifdef CONFIG_X86_64 +@@ -2630,7 +2605,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) + ret = rsm_load_state_64(ctxt, buf); + else + #endif +- ret = rsm_load_state_32(ctxt, buf); ++ ret = rsm_load_state_32(ctxt, (struct kvm_smram_state_32 *)buf); + + if (ret != X86EMUL_CONTINUE) + goto emulate_shutdown; +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 922fc258c37f..b626eb3fa376 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9018,22 +9018,18 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) + return flags; + } + +-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) ++static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, ++ struct kvm_smm_seg_state_32 *state, ++ u32 *selector, ++ int n) + { + struct kvm_segment seg; +- int offset; + + kvm_get_segment(vcpu, &seg, n); +- put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector); +- +- if (n < 3) +- offset = 0x7f84 + n * 12; +- else +- offset = 0x7f2c + (n - 3) * 12; +- +- put_smstate(u32, buf, offset + 8, seg.base); +- put_smstate(u32, buf, offset + 4, seg.limit); +- put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg)); ++ *selector = seg.selector; ++ state->base = seg.base; ++ state->limit = seg.limit; ++ state->flags = enter_smm_get_segment_flags(&seg); + } + + #ifdef CONFIG_X86_64 +@@ -9054,54 +9050,47 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) + } + #endif + +-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) ++static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_state_32 *smram) + { + struct desc_ptr dt; +- struct kvm_segment seg; + unsigned long val; + int i; + +- put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); +- put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); +- put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); +- put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); ++ smram->cr0 = kvm_read_cr0(vcpu); ++ smram->cr3 = kvm_read_cr3(vcpu); ++ smram->eflags = kvm_get_rflags(vcpu); ++ smram->eip = kvm_rip_read(vcpu); + + for (i = 0; i < 8; i++) +- put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); ++ smram->gprs[i] = kvm_register_read_raw(vcpu, i); + + kvm_get_dr(vcpu, 6, &val); +- put_smstate(u32, buf, 0x7fcc, (u32)val); ++ smram->dr6 = (u32)val; + kvm_get_dr(vcpu, 7, &val); +- put_smstate(u32, buf, 0x7fc8, (u32)val); ++ smram->dr7 = (u32)val; + +- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); +- put_smstate(u32, buf, 0x7fc4, seg.selector); +- put_smstate(u32, buf, 0x7f64, seg.base); +- put_smstate(u32, buf, 0x7f60, seg.limit); +- put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); +- +- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); +- put_smstate(u32, buf, 0x7fc0, seg.selector); +- put_smstate(u32, buf, 0x7f80, seg.base); +- put_smstate(u32, buf, 0x7f7c, seg.limit); +- put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); ++ enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR); ++ enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR); + + static_call(kvm_x86_get_gdt)(vcpu, &dt); +- put_smstate(u32, buf, 0x7f74, dt.address); +- put_smstate(u32, buf, 0x7f70, dt.size); ++ smram->gdtr.base = dt.address; ++ smram->gdtr.limit = dt.size; + + static_call(kvm_x86_get_idt)(vcpu, &dt); +- put_smstate(u32, buf, 0x7f58, dt.address); +- put_smstate(u32, buf, 0x7f54, dt.size); ++ smram->idtr.base = dt.address; ++ smram->idtr.limit = dt.size; + +- for (i = 0; i < 6; i++) +- enter_smm_save_seg_32(vcpu, buf, i); ++ enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES); ++ enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS); ++ enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS); + +- put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); ++ enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS); ++ enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS); ++ enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS); + +- /* revision id */ +- put_smstate(u32, buf, 0x7efc, 0x00020000); +- put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); ++ smram->cr4 = kvm_read_cr4(vcpu); ++ smram->smm_revision = 0x00020000; ++ smram->smbase = vcpu->arch.smbase; + } + + #ifdef CONFIG_X86_64 +@@ -9172,7 +9161,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) + enter_smm_save_state_64(vcpu, buf); + else + #endif +- enter_smm_save_state_32(vcpu, buf); ++ enter_smm_save_state_32(vcpu, (struct kvm_smram_state_32 *)buf); + + /* + * Give enter_smm() a chance to make ISA-specific changes to the vCPU diff --git a/patches/kernel/0022-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch b/patches/kernel/0022-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch new file mode 100644 index 0000000..ce4a011 --- /dev/null +++ b/patches/kernel/0022-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch @@ -0,0 +1,279 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:09:00 +0300 +Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 64 bit smram + load/restore + +Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state +(used when X86_FEATURE_LM is present in the guest CPUID, +regardless of 32-bitness of the guest). + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 88 ++++++++++++++---------------------------- + arch/x86/kvm/x86.c | 75 ++++++++++++++++------------------- + 2 files changed, 62 insertions(+), 101 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 44ce8d51f18b..98c2cf169b39 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2354,24 +2354,16 @@ static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, + } + + #ifdef CONFIG_X86_64 +-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, +- int n) ++static void rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, ++ struct kvm_smm_seg_state_64 *state, ++ int n) + { + struct desc_struct desc; +- int offset; +- u16 selector; +- u32 base3; +- +- offset = 0x7e00 + n * 16; +- +- selector = GET_SMSTATE(u16, smstate, offset); +- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); +- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); +- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); +- base3 = GET_SMSTATE(u32, smstate, offset + 12); + +- ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); +- return X86EMUL_CONTINUE; ++ rsm_set_desc_flags(&desc, state->attributes << 8); ++ set_desc_limit(&desc, state->limit); ++ set_desc_base(&desc, (u32)state->base); ++ ctxt->ops->set_segment(ctxt, state->selector, &desc, state->base >> 32, n); + } + #endif + +@@ -2465,71 +2457,49 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, + + #ifdef CONFIG_X86_64 + static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, +- const char *smstate) ++ struct kvm_smram_state_64 *smstate) + { +- struct desc_struct desc; + struct desc_ptr dt; +- u64 val, cr0, cr3, cr4; +- u32 base3; +- u16 selector; + int i, r; + + for (i = 0; i < 16; i++) +- *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); ++ *reg_write(ctxt, i) = smstate->gprs[15 - i]; + +- ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); +- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; ++ ctxt->_eip = smstate->rip; ++ ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED; + +- val = GET_SMSTATE(u64, smstate, 0x7f68); +- +- if (ctxt->ops->set_dr(ctxt, 6, val)) ++ if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6)) + return X86EMUL_UNHANDLEABLE; +- +- val = GET_SMSTATE(u64, smstate, 0x7f60); +- +- if (ctxt->ops->set_dr(ctxt, 7, val)) ++ if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7)) + return X86EMUL_UNHANDLEABLE; + +- cr0 = GET_SMSTATE(u64, smstate, 0x7f58); +- cr3 = GET_SMSTATE(u64, smstate, 0x7f50); +- cr4 = GET_SMSTATE(u64, smstate, 0x7f48); +- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); +- val = GET_SMSTATE(u64, smstate, 0x7ed0); ++ ctxt->ops->set_smbase(ctxt, smstate->smbase); + +- if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA)) ++ if (ctxt->ops->set_msr(ctxt, MSR_EFER, smstate->efer & ~EFER_LMA)) + return X86EMUL_UNHANDLEABLE; + +- selector = GET_SMSTATE(u32, smstate, 0x7e90); +- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); +- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); +- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); +- base3 = GET_SMSTATE(u32, smstate, 0x7e9c); +- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); ++ rsm_load_seg_64(ctxt, &smstate->tr, VCPU_SREG_TR); + +- dt.size = GET_SMSTATE(u32, smstate, 0x7e84); +- dt.address = GET_SMSTATE(u64, smstate, 0x7e88); ++ dt.size = smstate->idtr.limit; ++ dt.address = smstate->idtr.base; + ctxt->ops->set_idt(ctxt, &dt); + +- selector = GET_SMSTATE(u32, smstate, 0x7e70); +- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); +- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); +- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); +- base3 = GET_SMSTATE(u32, smstate, 0x7e7c); +- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); ++ rsm_load_seg_64(ctxt, &smstate->ldtr, VCPU_SREG_LDTR); + +- dt.size = GET_SMSTATE(u32, smstate, 0x7e64); +- dt.address = GET_SMSTATE(u64, smstate, 0x7e68); ++ dt.size = smstate->gdtr.limit; ++ dt.address = smstate->gdtr.base; + ctxt->ops->set_gdt(ctxt, &dt); + +- r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); ++ r = rsm_enter_protected_mode(ctxt, smstate->cr0, smstate->cr3, smstate->cr4); + if (r != X86EMUL_CONTINUE) + return r; + +- for (i = 0; i < 6; i++) { +- r = rsm_load_seg_64(ctxt, smstate, i); +- if (r != X86EMUL_CONTINUE) +- return r; +- } ++ rsm_load_seg_64(ctxt, &smstate->es, VCPU_SREG_ES); ++ rsm_load_seg_64(ctxt, &smstate->cs, VCPU_SREG_CS); ++ rsm_load_seg_64(ctxt, &smstate->ss, VCPU_SREG_SS); ++ rsm_load_seg_64(ctxt, &smstate->ds, VCPU_SREG_DS); ++ rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS); ++ rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS); + + return X86EMUL_CONTINUE; + } +@@ -2602,7 +2572,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) + + #ifdef CONFIG_X86_64 + if (emulator_has_longmode(ctxt)) +- ret = rsm_load_state_64(ctxt, buf); ++ ret = rsm_load_state_64(ctxt, (struct kvm_smram_state_64 *)buf); + else + #endif + ret = rsm_load_state_32(ctxt, (struct kvm_smram_state_32 *)buf); +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index b626eb3fa376..f40cd45b6a01 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9033,20 +9033,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, + } + + #ifdef CONFIG_X86_64 +-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) ++static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, ++ struct kvm_smm_seg_state_64 *state, ++ int n) + { + struct kvm_segment seg; +- int offset; +- u16 flags; + + kvm_get_segment(vcpu, &seg, n); +- offset = 0x7e00 + n * 16; +- +- flags = enter_smm_get_segment_flags(&seg) >> 8; +- put_smstate(u16, buf, offset, seg.selector); +- put_smstate(u16, buf, offset + 2, flags); +- put_smstate(u32, buf, offset + 4, seg.limit); +- put_smstate(u64, buf, offset + 8, seg.base); ++ state->selector = seg.selector; ++ state->attributes = enter_smm_get_segment_flags(&seg) >> 8; ++ state->limit = seg.limit; ++ state->base = seg.base; + } + #endif + +@@ -9094,57 +9091,51 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat + } + + #ifdef CONFIG_X86_64 +-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) ++static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_state_64 *smram) + { + struct desc_ptr dt; +- struct kvm_segment seg; + unsigned long val; + int i; + + for (i = 0; i < 16; i++) +- put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); ++ smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i); ++ ++ smram->rip = kvm_rip_read(vcpu); ++ smram->rflags = kvm_get_rflags(vcpu); + +- put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu)); +- put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); + + kvm_get_dr(vcpu, 6, &val); +- put_smstate(u64, buf, 0x7f68, val); ++ smram->dr6 = val; + kvm_get_dr(vcpu, 7, &val); +- put_smstate(u64, buf, 0x7f60, val); +- +- put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); +- put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); +- put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); ++ smram->dr7 = val; + +- put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase); ++ smram->cr0 = kvm_read_cr0(vcpu); ++ smram->cr3 = kvm_read_cr3(vcpu); ++ smram->cr4 = kvm_read_cr4(vcpu); + +- /* revision id */ +- put_smstate(u32, buf, 0x7efc, 0x00020064); ++ smram->smbase = vcpu->arch.smbase; ++ smram->smm_revison = 0x00020064; + +- put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer); ++ smram->efer = vcpu->arch.efer; + +- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); +- put_smstate(u16, buf, 0x7e90, seg.selector); +- put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); +- put_smstate(u32, buf, 0x7e94, seg.limit); +- put_smstate(u64, buf, 0x7e98, seg.base); ++ enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR); + + static_call(kvm_x86_get_idt)(vcpu, &dt); +- put_smstate(u32, buf, 0x7e84, dt.size); +- put_smstate(u64, buf, 0x7e88, dt.address); ++ smram->idtr.limit = dt.size; ++ smram->idtr.base = dt.address; + +- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); +- put_smstate(u16, buf, 0x7e70, seg.selector); +- put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); +- put_smstate(u32, buf, 0x7e74, seg.limit); +- put_smstate(u64, buf, 0x7e78, seg.base); ++ enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR); + + static_call(kvm_x86_get_gdt)(vcpu, &dt); +- put_smstate(u32, buf, 0x7e64, dt.size); +- put_smstate(u64, buf, 0x7e68, dt.address); ++ smram->gdtr.limit = dt.size; ++ smram->gdtr.base = dt.address; + +- for (i = 0; i < 6; i++) +- enter_smm_save_seg_64(vcpu, buf, i); ++ enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES); ++ enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS); ++ enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS); ++ enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); ++ enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); ++ enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); + } + #endif + +@@ -9158,7 +9149,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) + memset(buf, 0, 512); + #ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) +- enter_smm_save_state_64(vcpu, buf); ++ enter_smm_save_state_64(vcpu, (struct kvm_smram_state_64 *)buf); + else + #endif + enter_smm_save_state_32(vcpu, (struct kvm_smram_state_32 *)buf); diff --git a/patches/kernel/0023-KVM-x86-SVM-use-smram-structs.patch b/patches/kernel/0023-KVM-x86-SVM-use-smram-structs.patch new file mode 100644 index 0000000..891b4a2 --- /dev/null +++ b/patches/kernel/0023-KVM-x86-SVM-use-smram-structs.patch @@ -0,0 +1,111 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:09:01 +0300 +Subject: [PATCH] KVM: x86: SVM: use smram structs + +This removes the last user of put_smstate/GET_SMSTATE so +remove these functions as well. + +Also add a sanity check that we don't attempt to enter the SMM +on non long mode capable guest CPU with a running nested guest. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/include/asm/kvm_host.h | 6 ------ + arch/x86/kvm/svm/svm.c | 28 +++++++++++++++++----------- + 2 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 867febee8fc3..4b443624b884 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1931,12 +1931,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) + #endif + } + +-#define put_smstate(type, buf, offset, val) \ +- *(type *)((buf) + (offset) - 0x7e00) = val +- +-#define GET_SMSTATE(type, buf, offset) \ +- (*(type *)((buf) + (offset) - 0x7e00)) +- + int kvm_cpu_dirty_log_size(void); + + int alloc_all_memslots_rmaps(struct kvm *kvm); +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index 21f747eacc9a..283b1ca95317 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -4304,6 +4304,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) + + static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) + { ++ struct kvm_smram_state_64 *smram = (struct kvm_smram_state_64 *)smstate; + struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map map_save; + int ret; +@@ -4311,10 +4312,17 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) + if (!is_guest_mode(vcpu)) + return 0; + +- /* FED8h - SVM Guest */ +- put_smstate(u64, smstate, 0x7ed8, 1); +- /* FEE0h - SVM Guest VMCB Physical Address */ +- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); ++ /* ++ * 32 bit SMRAM format doesn't preserve EFER and SVM state. ++ * SVM should not be enabled by the userspace without marking ++ * the CPU as at least long mode capable. ++ */ ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) ++ return 1; ++ ++ smram->svm_guest_flag = 1; ++ smram->svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; + + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; + svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; +@@ -4351,9 +4359,9 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) + + static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) + { ++ struct kvm_smram_state_64 *smram = (struct kvm_smram_state_64 *)smstate; + struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map map, map_save; +- u64 saved_efer, vmcb12_gpa; + struct vmcb *vmcb12; + int ret; + +@@ -4361,18 +4369,16 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) + return 0; + + /* Non-zero if SMI arrived while vCPU was in guest mode. */ +- if (!GET_SMSTATE(u64, smstate, 0x7ed8)) ++ if (!smram->svm_guest_flag) + return 0; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) + return 1; + +- saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); +- if (!(saved_efer & EFER_SVME)) ++ if (!(smram->efer & EFER_SVME)) + return 1; + +- vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); +- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) ++ if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram->svm_guest_vmcb_gpa), &map) == -EINVAL) + return 1; + + ret = 1; +@@ -4397,7 +4403,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) + + vmcb12 = map.hva; + nested_load_control_from_vmcb12(svm, &vmcb12->control); +- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); ++ ret = enter_svm_guest_mode(vcpu, smram->svm_guest_vmcb_gpa, vmcb12, false); + + if (ret) + goto unmap_save; diff --git a/patches/kernel/0024-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch b/patches/kernel/0024-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch new file mode 100644 index 0000000..0c0d2e6 --- /dev/null +++ b/patches/kernel/0024-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch @@ -0,0 +1,167 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 21 Jun 2022 18:09:02 +0300 +Subject: [PATCH] KVM: x86: emulator/smm: preserve interrupt shadow in SMRAM + +When #SMI is asserted, the CPU can be in interrupt shadow +due to sti or mov ss. + +It is not mandatory in Intel/AMD prm to have the #SMI +blocked during the shadow, and on top of +that, since neither SVM nor VMX has true support for SMI +window, waiting for one instruction would mean single stepping +the guest. + +Instead, allow #SMI in this case, but both reset the interrupt +window and stash its value in SMRAM to restore it on exit +from SMM. + +This fixes rare failures seen mostly on windows guests on VMX, +when #SMI falls on the sti instruction which mainfest in +VM entry failure due to EFLAGS.IF not being set, but STI interrupt +window still being set in the VMCS. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/emulate.c | 17 ++++++++++++++--- + arch/x86/kvm/kvm_emulate.h | 13 ++++++++++--- + arch/x86/kvm/x86.c | 12 ++++++++++++ + 3 files changed, 36 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 98c2cf169b39..5614456de922 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2416,7 +2416,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, + struct kvm_smram_state_32 *smstate) + { + struct desc_ptr dt; +- int i; ++ int i, r; + + ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; + ctxt->_eip = smstate->eip; +@@ -2451,8 +2451,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, + + ctxt->ops->set_smbase(ctxt, smstate->smbase); + +- return rsm_enter_protected_mode(ctxt, smstate->cr0, +- smstate->cr3, smstate->cr4); ++ r = rsm_enter_protected_mode(ctxt, smstate->cr0, ++ smstate->cr3, smstate->cr4); ++ ++ if (r != X86EMUL_CONTINUE) ++ return r; ++ ++ ctxt->ops->set_int_shadow(ctxt, 0); ++ ctxt->interruptibility = (u8)smstate->int_shadow; ++ ++ return X86EMUL_CONTINUE; + } + + #ifdef CONFIG_X86_64 +@@ -2501,6 +2509,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, + rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS); + rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS); + ++ ctxt->ops->set_int_shadow(ctxt, 0); ++ ctxt->interruptibility = (u8)smstate->int_shadow; ++ + return X86EMUL_CONTINUE; + } + #endif +diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h +index d16b377be70b..5b881a3a5ed9 100644 +--- a/arch/x86/kvm/kvm_emulate.h ++++ b/arch/x86/kvm/kvm_emulate.h +@@ -229,6 +229,7 @@ struct x86_emulate_ops { + bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt); + + void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); ++ void (*set_int_shadow)(struct x86_emulate_ctxt *ctxt, u8 shadow); + + unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); + void (*exiting_smm)(struct x86_emulate_ctxt *ctxt); +@@ -499,7 +500,9 @@ struct kvm_smram_state_32 { + u32 reserved1[62]; /* FE00 - FEF7 */ + u32 smbase; /* FEF8 */ + u32 smm_revision; /* FEFC */ +- u32 reserved2[5]; /* FF00-FF13 */ ++ u32 reserved2[4]; /* FF00-FF0F*/ ++ /* int_shadow is KVM extension*/ ++ u32 int_shadow; /* FF10 */ + /* CR4 is not present in Intel/AMD SMRAM image*/ + u32 cr4; /* FF14 */ + u32 reserved3[5]; /* FF18 */ +@@ -571,13 +574,17 @@ struct kvm_smram_state_64 { + struct kvm_smm_seg_state_64 idtr; /* FE80 (R/O) */ + struct kvm_smm_seg_state_64 tr; /* FE90 (R/O) */ + +- /* I/O restart and auto halt restart are not implemented by KVM */ ++ /* ++ * I/O restart and auto halt restart are not implemented by KVM ++ * int_shadow is KVM's extension ++ */ ++ + u64 io_restart_rip; /* FEA0 (R/O) */ + u64 io_restart_rcx; /* FEA8 (R/O) */ + u64 io_restart_rsi; /* FEB0 (R/O) */ + u64 io_restart_rdi; /* FEB8 (R/O) */ + u32 io_restart_dword; /* FEC0 (R/O) */ +- u32 reserved1; /* FEC4 */ ++ u32 int_shadow; /* FEC4 (R/O) */ + u8 io_instruction_restart; /* FEC8 (R/W) */ + u8 auto_halt_restart; /* FEC9 (R/W) */ + u8 reserved2[6]; /* FECA-FECF */ +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index f40cd45b6a01..9afac97ea98c 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -7299,6 +7299,11 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) + static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked); + } + ++static void emulator_set_int_shadow(struct x86_emulate_ctxt *ctxt, u8 shadow) ++{ ++ static_call(kvm_x86_set_interrupt_shadow)(emul_to_vcpu(ctxt), shadow); ++} ++ + static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) + { + return emul_to_vcpu(ctxt)->arch.hflags; +@@ -7368,6 +7373,7 @@ static const struct x86_emulate_ops emulate_ops = { + .guest_has_fxsr = emulator_guest_has_fxsr, + .guest_has_rdpid = emulator_guest_has_rdpid, + .set_nmi_mask = emulator_set_nmi_mask, ++ .set_int_shadow = emulator_set_int_shadow, + .get_hflags = emulator_get_hflags, + .exiting_smm = emulator_exiting_smm, + .leave_smm = emulator_leave_smm, +@@ -9088,6 +9094,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat + smram->cr4 = kvm_read_cr4(vcpu); + smram->smm_revision = 0x00020000; + smram->smbase = vcpu->arch.smbase; ++ ++ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); + } + + #ifdef CONFIG_X86_64 +@@ -9136,6 +9144,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_stat + enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); + enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); + enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); ++ ++ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); + } + #endif + +@@ -9172,6 +9182,8 @@ static void enter_smm(struct kvm_vcpu *vcpu) + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); + kvm_rip_write(vcpu, 0x8000); + ++ static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); ++ + cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); + static_call(kvm_x86_set_cr0)(vcpu, cr0); + vcpu->arch.cr0 = cr0;