restructure patches
rebase on Ubuntu-4.13.0-32.35 the effective kernel tree which gets compiled after patches have been applied is functionally identical (modulo parts for architectures which we don't care about and Ubuntu build files)
This commit is contained in:
parent
8d1dbe7c68
commit
a70918fbbc
@ -54,10 +54,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
2 files changed, 111 insertions(+)
|
||||
|
||||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||
index 7441c67d1d8e..73fd6abac39b 100644
|
||||
index 1738d820c56f..e7216bc05b3b 100644
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -2918,6 +2918,15 @@
|
||||
@@ -2930,6 +2930,15 @@
|
||||
nomsi [MSI] If the PCI_MSI kernel config parameter is
|
||||
enabled, this kernel boot option can be used to
|
||||
disable the use of MSI interrupts system-wide.
|
||||
|
@ -23,10 +23,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
3 files changed, 25 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
|
||||
index 92c9032502d8..b69af3df978a 100644
|
||||
index 066b51796695..f39bc68efa56 100644
|
||||
--- a/arch/x86/include/asm/kvm_host.h
|
||||
+++ b/arch/x86/include/asm/kvm_host.h
|
||||
@@ -1437,4 +1437,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
||||
@@ -1546,4 +1546,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -35,7 +35,7 @@ index 92c9032502d8..b69af3df978a 100644
|
||||
+
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
|
||||
index 7351cdc46cc7..703cd4171921 100644
|
||||
index f896c441fc2c..eae4aecf3cfe 100644
|
||||
--- a/arch/x86/kvm/x86.c
|
||||
+++ b/arch/x86/kvm/x86.c
|
||||
@@ -6711,6 +6711,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
|
||||
|
@ -36,10 +36,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
1 file changed, 106 insertions(+), 44 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
||||
index 118709e7597d..a2c95522ac99 100644
|
||||
index cb044cd17790..d2168203bddc 100644
|
||||
--- a/arch/x86/kvm/vmx.c
|
||||
+++ b/arch/x86/kvm/vmx.c
|
||||
@@ -202,6 +202,10 @@ struct loaded_vmcs {
|
||||
@@ -203,6 +203,10 @@ struct loaded_vmcs {
|
||||
bool nmi_known_unmasked;
|
||||
unsigned long vmcs_host_cr3; /* May not match real cr3 */
|
||||
unsigned long vmcs_host_cr4; /* May not match real cr4 */
|
||||
@ -50,7 +50,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
struct list_head loaded_vmcss_on_cpu_link;
|
||||
};
|
||||
|
||||
@@ -1288,6 +1292,11 @@ static inline bool cpu_has_vmx_invpcid(void)
|
||||
@@ -1289,6 +1293,11 @@ static inline bool cpu_has_vmx_invpcid(void)
|
||||
SECONDARY_EXEC_ENABLE_INVPCID;
|
||||
}
|
||||
|
||||
@ -62,7 +62,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
static inline bool cpu_has_vmx_wbinvd_exit(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
@@ -1339,11 +1348,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
|
||||
@@ -1340,11 +1349,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
|
||||
(vmcs12->secondary_vm_exec_control & bit);
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return vmcs12->pin_based_vm_exec_control &
|
||||
@@ -3676,9 +3680,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
@@ -3686,9 +3690,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
&_vmexit_control) < 0)
|
||||
return -EIO;
|
||||
|
||||
@ -87,7 +87,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
|
||||
&_pin_based_exec_control) < 0)
|
||||
return -EIO;
|
||||
@@ -5538,7 +5542,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
@@ -5548,7 +5552,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -97,7 +97,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
enable_irq_window(vcpu);
|
||||
return;
|
||||
}
|
||||
@@ -5578,6 +5583,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
@@ -5588,6 +5593,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
@ -117,7 +117,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
++vcpu->stat.nmi_injections;
|
||||
vmx->loaded_vmcs->nmi_known_unmasked = false;
|
||||
|
||||
@@ -5596,6 +5614,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
|
||||
@@ -5606,6 +5624,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
bool masked;
|
||||
|
||||
@ -126,7 +126,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
if (vmx->loaded_vmcs->nmi_known_unmasked)
|
||||
return false;
|
||||
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
|
||||
@@ -5607,13 +5627,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
|
||||
@@ -5617,13 +5637,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
@ -154,7 +154,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
}
|
||||
|
||||
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
@@ -5621,6 +5648,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
@@ -5631,6 +5658,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
if (to_vmx(vcpu)->nested.nested_run_pending)
|
||||
return 0;
|
||||
|
||||
@ -165,7 +165,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
|
||||
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
|
||||
| GUEST_INTR_STATE_NMI));
|
||||
@@ -6348,6 +6379,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
||||
@@ -6359,6 +6390,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
||||
* AAK134, BY25.
|
||||
*/
|
||||
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
@ -173,7 +173,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
||||
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
|
||||
|
||||
@@ -6820,7 +6852,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
|
||||
@@ -6833,7 +6865,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
|
||||
}
|
||||
|
||||
/* Create a new VMCS */
|
||||
@ -182,7 +182,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
if (!item)
|
||||
return NULL;
|
||||
item->vmcs02.vmcs = alloc_vmcs();
|
||||
@@ -7837,6 +7869,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
|
||||
@@ -7850,6 +7882,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
|
||||
* "blocked by NMI" bit has to be set before next VM entry.
|
||||
*/
|
||||
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
@ -190,7 +190,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
||||
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||||
GUEST_INTR_STATE_NMI);
|
||||
@@ -8554,6 +8587,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
@@ -8567,6 +8600,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -216,7 +216,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
if (exit_reason < kvm_vmx_max_exit_handlers
|
||||
&& kvm_vmx_exit_handlers[exit_reason])
|
||||
return kvm_vmx_exit_handlers[exit_reason](vcpu);
|
||||
@@ -8837,33 +8889,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
|
||||
@@ -8850,33 +8902,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
|
||||
|
||||
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
|
||||
|
||||
@ -282,7 +282,7 @@ index 118709e7597d..a2c95522ac99 100644
|
||||
}
|
||||
|
||||
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
|
||||
@@ -8980,6 +9037,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
@@ -8993,6 +9050,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned long debugctlmsr, cr3, cr4;
|
||||
|
||||
|
@ -34,10 +34,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
|
||||
index af256b786a70..af09baa3d736 100644
|
||||
index 55fb408465f7..e99bdfcc6b01 100644
|
||||
--- a/arch/x86/kvm/svm.c
|
||||
+++ b/arch/x86/kvm/svm.c
|
||||
@@ -3626,6 +3626,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
@@ -3649,6 +3649,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
u32 ecx = msr->index;
|
||||
u64 data = msr->data;
|
||||
switch (ecx) {
|
||||
|
@ -1,83 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:20 -0700
|
||||
Subject: [PATCH] x86/mm: Add the 'nopcid' boot option to turn off PCID
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The parameter is only present on x86_64 systems to save a few bytes,
|
||||
as PCID is always disabled on x86_32.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 0790c9aad84901ca1bdc14746175549c8b5da215)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 62d3a63645c17611fe8ccc0c5adc5e840d9cff7b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Documentation/admin-guide/kernel-parameters.txt | 2 ++
|
||||
arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++
|
||||
2 files changed, 20 insertions(+)
|
||||
|
||||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||
index 73fd6abac39b..3510e255ef4c 100644
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -2700,6 +2700,8 @@
|
||||
nopat [X86] Disable PAT (page attribute table extension of
|
||||
pagetables) support.
|
||||
|
||||
+ nopcid [X86-64] Disable the PCID cpu feature.
|
||||
+
|
||||
norandmaps Don't use address space randomization. Equivalent to
|
||||
echo 0 > /proc/sys/kernel/randomize_va_space
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index c8b39870f33e..904485e7b230 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -168,6 +168,24 @@ static int __init x86_mpx_setup(char *s)
|
||||
}
|
||||
__setup("nompx", x86_mpx_setup);
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
+static int __init x86_pcid_setup(char *s)
|
||||
+{
|
||||
+ /* require an exact match without trailing characters */
|
||||
+ if (strlen(s))
|
||||
+ return 0;
|
||||
+
|
||||
+ /* do not emit a message if the feature is not present */
|
||||
+ if (!boot_cpu_has(X86_FEATURE_PCID))
|
||||
+ return 1;
|
||||
+
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
+ pr_info("nopcid: PCID feature disabled\n");
|
||||
+ return 1;
|
||||
+}
|
||||
+__setup("nopcid", x86_pcid_setup);
|
||||
+#endif
|
||||
+
|
||||
static int __init x86_noinvpcid_setup(char *s)
|
||||
{
|
||||
/* noinvpcid doesn't accept parameters */
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,120 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:21 -0700
|
||||
Subject: [PATCH] x86/mm: Enable CR4.PCIDE on supported systems
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
We can use PCID if the CPU has PCID and PGE and we're not on Xen.
|
||||
|
||||
By itself, this has no effect. A followup patch will start using PCID.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 7d6bbe5528395f18de50bd2532843546c849883d)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/tlbflush.h | 8 ++++++++
|
||||
arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++
|
||||
arch/x86/xen/enlighten_pv.c | 6 ++++++
|
||||
3 files changed, 36 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 50ea3482e1d1..2b3d68093235 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -207,6 +207,14 @@ static inline void __flush_tlb_all(void)
|
||||
__flush_tlb_global();
|
||||
else
|
||||
__flush_tlb();
|
||||
+
|
||||
+ /*
|
||||
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
|
||||
+ * we'd end up flushing kernel translations for the current ASID but
|
||||
+ * we might fail to flush kernel translations for other cached ASIDs.
|
||||
+ *
|
||||
+ * To avoid this issue, we force PCID off if PGE is off.
|
||||
+ */
|
||||
}
|
||||
|
||||
static inline void __flush_tlb_one(unsigned long addr)
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index 904485e7b230..b95cd94ca97b 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -329,6 +329,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
+static void setup_pcid(struct cpuinfo_x86 *c)
|
||||
+{
|
||||
+ if (cpu_has(c, X86_FEATURE_PCID)) {
|
||||
+ if (cpu_has(c, X86_FEATURE_PGE)) {
|
||||
+ cr4_set_bits(X86_CR4_PCIDE);
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * flush_tlb_all(), as currently implemented, won't
|
||||
+ * work if PCID is on but PGE is not. Since that
|
||||
+ * combination doesn't exist on real hardware, there's
|
||||
+ * no reason to try to fully support it, but it's
|
||||
+ * polite to avoid corrupting data if we're on
|
||||
+ * an improperly configured VM.
|
||||
+ */
|
||||
+ clear_cpu_cap(c, X86_FEATURE_PCID);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Protection Keys are not available in 32-bit mode.
|
||||
*/
|
||||
@@ -1143,6 +1162,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
setup_smep(c);
|
||||
setup_smap(c);
|
||||
|
||||
+ /* Set up PCID */
|
||||
+ setup_pcid(c);
|
||||
+
|
||||
/*
|
||||
* The vendor-specific functions might have changed features.
|
||||
* Now we do "generic changes."
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 811e4ddb3f37..290bc5ac9852 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -264,6 +264,12 @@ static void __init xen_init_capabilities(void)
|
||||
setup_clear_cpu_cap(X86_FEATURE_ACC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
|
||||
|
||||
+ /*
|
||||
+ * Xen PV would need some work to support PCID: CR3 handling as well
|
||||
+ * as xen_flush_tlb_others() would need updating.
|
||||
+ */
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
+
|
||||
if (!xen_initial_domain())
|
||||
setup_clear_cpu_cap(X86_FEATURE_ACPI);
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,54 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Wed, 6 Sep 2017 19:54:54 -0700
|
||||
Subject: [PATCH] x86/mm: Document how CR4.PCIDE restore works
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
While debugging a problem, I thought that using
|
||||
cr4_set_bits_and_update_boot() to restore CR4.PCIDE would be
|
||||
helpful. It turns out to be counterproductive.
|
||||
|
||||
Add a comment documenting how this works.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
(cherry picked from commit 1c9fe4409ce3e9c78b1ed96ee8ed699d4f03bf33)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0d69e4c4a2db42a9bac6609a3df15bd91163f8b9)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 13 +++++++++++++
|
||||
1 file changed, 13 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index b95cd94ca97b..0b80ed14ff52 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -333,6 +333,19 @@ static void setup_pcid(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (cpu_has(c, X86_FEATURE_PCID)) {
|
||||
if (cpu_has(c, X86_FEATURE_PGE)) {
|
||||
+ /*
|
||||
+ * We'd like to use cr4_set_bits_and_update_boot(),
|
||||
+ * but we can't. CR4.PCIDE is special and can only
|
||||
+ * be set in long mode, and the early CPU init code
|
||||
+ * doesn't know this and would try to restore CR4.PCIDE
|
||||
+ * prior to entering long mode.
|
||||
+ *
|
||||
+ * Instead, we rely on the fact that hotplug, resume,
|
||||
+ * etc all fully restore CR4 before they write anything
|
||||
+ * that could have nonzero PCID bits to CR3. CR4.PCIDE
|
||||
+ * has no effect on the page tables themselves, so we
|
||||
+ * don't need it to be restored early.
|
||||
+ */
|
||||
cr4_set_bits(X86_CR4_PCIDE);
|
||||
} else {
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,53 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Mohamed Ghannam <simo.ghannam@gmail.com>
|
||||
Date: Fri, 8 Dec 2017 15:39:50 +0100
|
||||
Subject: [PATCH] dccp: CVE-2017-8824: use-after-free in DCCP code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Whenever the sock object is in DCCP_CLOSED state,
|
||||
dccp_disconnect() must free dccps_hc_tx_ccid and
|
||||
dccps_hc_rx_ccid and set to NULL.
|
||||
|
||||
Signed-off-by: Mohamed Ghannam <simo.ghannam@gmail.com>
|
||||
Reviewed-by: Eric Dumazet <edumazet@google.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
|
||||
CVE-2017-8824
|
||||
(cherry picked from commit 69c64866ce072dea1d1e59a0d61e0f66c0dffb76 linux-next)
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
Acked-by: Seth Forshee <seth.forshee@canonical.com>
|
||||
Acked-by: Colin Ian King <colin.king@canonical.com>
|
||||
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
net/dccp/proto.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
|
||||
index b68168fcc06a..9d43c1f40274 100644
|
||||
--- a/net/dccp/proto.c
|
||||
+++ b/net/dccp/proto.c
|
||||
@@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
+ struct dccp_sock *dp = dccp_sk(sk);
|
||||
int err = 0;
|
||||
const int old_state = sk->sk_state;
|
||||
|
||||
@@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags)
|
||||
sk->sk_err = ECONNRESET;
|
||||
|
||||
dccp_clear_xmit_timers(sk);
|
||||
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
|
||||
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
|
||||
+ dp->dccps_hc_rx_ccid = NULL;
|
||||
+ dp->dccps_hc_tx_ccid = NULL;
|
||||
|
||||
__skb_queue_purge(&sk->sk_receive_queue);
|
||||
__skb_queue_purge(&sk->sk_write_queue);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,201 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Tue, 11 Jul 2017 10:33:38 -0500
|
||||
Subject: [PATCH] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This will allow IRQ stacks to nest inside NMIs or similar entries
|
||||
that can happen during IRQ stack setup or teardown.
|
||||
|
||||
The new macros won't work correctly if they're invoked with IRQs on.
|
||||
Add a check under CONFIG_DEBUG_ENTRY to detect that.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
[ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool
|
||||
and ORC unwinder's lives a little easier. ]
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 1d3e53e8624a3ec85f4041ca6d973da7c1575938)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit be58b042e135d0ee777a54798f33015857d7e2e0)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/process_64.c | 3 ++
|
||||
arch/x86/Kconfig.debug | 2 --
|
||||
arch/x86/entry/entry_64.S | 85 +++++++++++++++++++++++++++++++-------------
|
||||
3 files changed, 64 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index fe56e6f93cbb..1e7701c4cd80 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -404,6 +404,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
|
||||
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||||
+ this_cpu_read(irq_count) != -1);
|
||||
+
|
||||
switch_fpu_prepare(prev_fpu, cpu);
|
||||
|
||||
/* We must save %fs and %gs before load_TLS() because
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index cd20ca0b4043..1fc519f3c49e 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
|
||||
Some of these sanity checks may slow down kernel entries and
|
||||
exits or otherwise impact performance.
|
||||
|
||||
- This is currently used to help test NMI code.
|
||||
-
|
||||
If unsure, say N.
|
||||
|
||||
config DEBUG_NMI_SELFTEST
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 6d078b89a5e8..07b4056af8a8 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
|
||||
.endr
|
||||
END(irq_entries_start)
|
||||
|
||||
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ pushfq
|
||||
+ testl $X86_EFLAGS_IF, (%rsp)
|
||||
+ jz .Lokay_\@
|
||||
+ ud2
|
||||
+.Lokay_\@:
|
||||
+ addq $8, %rsp
|
||||
+#endif
|
||||
+.endm
|
||||
+
|
||||
+/*
|
||||
+ * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
|
||||
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
|
||||
+ * Requires kernel GSBASE.
|
||||
+ *
|
||||
+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
|
||||
+ */
|
||||
+.macro ENTER_IRQ_STACK old_rsp
|
||||
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
+ movq %rsp, \old_rsp
|
||||
+ incl PER_CPU_VAR(irq_count)
|
||||
+
|
||||
+ /*
|
||||
+ * Right now, if we just incremented irq_count to zero, we've
|
||||
+ * claimed the IRQ stack but we haven't switched to it yet.
|
||||
+ *
|
||||
+ * If anything is added that can interrupt us here without using IST,
|
||||
+ * it must be *extremely* careful to limit its stack usage. This
|
||||
+ * could include kprobes and a hypothetical future IST-less #DB
|
||||
+ * handler.
|
||||
+ */
|
||||
+
|
||||
+ cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
+ pushq \old_rsp
|
||||
+.endm
|
||||
+
|
||||
+/*
|
||||
+ * Undoes ENTER_IRQ_STACK.
|
||||
+ */
|
||||
+.macro LEAVE_IRQ_STACK
|
||||
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
+ /* We need to be off the IRQ stack before decrementing irq_count. */
|
||||
+ popq %rsp
|
||||
+
|
||||
+ /*
|
||||
+ * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
|
||||
+ * the irq stack but we're not on it.
|
||||
+ */
|
||||
+
|
||||
+ decl PER_CPU_VAR(irq_count)
|
||||
+.endm
|
||||
+
|
||||
/*
|
||||
* Interrupt entry/exit.
|
||||
*
|
||||
@@ -485,17 +538,7 @@ END(irq_entries_start)
|
||||
CALL_enter_from_user_mode
|
||||
|
||||
1:
|
||||
- /*
|
||||
- * Save previous stack pointer, optionally switch to interrupt stack.
|
||||
- * irq_count is used to check if a CPU is already on an interrupt stack
|
||||
- * or not. While this is essentially redundant with preempt_count it is
|
||||
- * a little cheaper to use a separate counter in the PDA (short of
|
||||
- * moving irq_enter into assembly, which would be too much work)
|
||||
- */
|
||||
- movq %rsp, %rdi
|
||||
- incl PER_CPU_VAR(irq_count)
|
||||
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
- pushq %rdi
|
||||
+ ENTER_IRQ_STACK old_rsp=%rdi
|
||||
/* We entered an interrupt context - irqs are off: */
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
@@ -515,10 +558,8 @@ common_interrupt:
|
||||
ret_from_intr:
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
- decl PER_CPU_VAR(irq_count)
|
||||
|
||||
- /* Restore saved previous stack */
|
||||
- popq %rsp
|
||||
+ LEAVE_IRQ_STACK
|
||||
|
||||
testb $3, CS(%rsp)
|
||||
jz retint_kernel
|
||||
@@ -892,12 +933,10 @@ bad_gs:
|
||||
ENTRY(do_softirq_own_stack)
|
||||
pushq %rbp
|
||||
mov %rsp, %rbp
|
||||
- incl PER_CPU_VAR(irq_count)
|
||||
- cmove PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
- push %rbp /* frame pointer backlink */
|
||||
+ ENTER_IRQ_STACK old_rsp=%r11
|
||||
call __do_softirq
|
||||
+ LEAVE_IRQ_STACK
|
||||
leaveq
|
||||
- decl PER_CPU_VAR(irq_count)
|
||||
ret
|
||||
END(do_softirq_own_stack)
|
||||
|
||||
@@ -924,13 +963,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
|
||||
* see the correct pointer to the pt_regs
|
||||
*/
|
||||
movq %rdi, %rsp /* we don't return, adjust the stack frame */
|
||||
-11: incl PER_CPU_VAR(irq_count)
|
||||
- movq %rsp, %rbp
|
||||
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
- pushq %rbp /* frame pointer backlink */
|
||||
+
|
||||
+ ENTER_IRQ_STACK old_rsp=%r10
|
||||
call xen_evtchn_do_upcall
|
||||
- popq %rsp
|
||||
- decl PER_CPU_VAR(irq_count)
|
||||
+ LEAVE_IRQ_STACK
|
||||
+
|
||||
#ifndef CONFIG_PREEMPT
|
||||
call xen_maybe_preempt_hcall
|
||||
#endif
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,73 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Xin Long <lucien.xin@gmail.com>
|
||||
Date: Thu, 7 Dec 2017 16:07:00 +0100
|
||||
Subject: [PATCH] sctp: do not peel off an assoc from one netns to another one
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Now when peeling off an association to the sock in another netns, all
|
||||
transports in this assoc are not to be rehashed and keep use the old
|
||||
key in hashtable.
|
||||
|
||||
As a transport uses sk->net as the hash key to insert into hashtable,
|
||||
it would miss removing these transports from hashtable due to the new
|
||||
netns when closing the sock and all transports are being freeed, then
|
||||
later an use-after-free issue could be caused when looking up an asoc
|
||||
and dereferencing those transports.
|
||||
|
||||
This is a very old issue since very beginning, ChunYu found it with
|
||||
syzkaller fuzz testing with this series:
|
||||
|
||||
socket$inet6_sctp()
|
||||
bind$inet6()
|
||||
sendto$inet6()
|
||||
unshare(0x40000000)
|
||||
getsockopt$inet_sctp6_SCTP_GET_ASSOC_ID_LIST()
|
||||
getsockopt$inet_sctp6_SCTP_SOCKOPT_PEELOFF()
|
||||
|
||||
This patch is to block this call when peeling one assoc off from one
|
||||
netns to another one, so that the netns of all transport would not
|
||||
go out-sync with the key in hashtable.
|
||||
|
||||
Note that this patch didn't fix it by rehashing transports, as it's
|
||||
difficult to handle the situation when the tuple is already in use
|
||||
in the new netns. Besides, no one would like to peel off one assoc
|
||||
to another netns, considering ipaddrs, ifaces, etc. are usually
|
||||
different.
|
||||
|
||||
Reported-by: ChunYu Wang <chunwang@redhat.com>
|
||||
Signed-off-by: Xin Long <lucien.xin@gmail.com>
|
||||
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
|
||||
Acked-by: Neil Horman <nhorman@tuxdriver.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
|
||||
CVE-2017-15115
|
||||
(cherry picked from commit df80cd9b28b9ebaa284a41df611dbf3a2d05ca74)
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
Acked-by: Colin Ian King <colin.king@canonical.com>
|
||||
Acked-by: Stefan Bader <stefan.bader@canonical.com>
|
||||
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
net/sctp/socket.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
|
||||
index 8d760863bc41..52f388e0448e 100644
|
||||
--- a/net/sctp/socket.c
|
||||
+++ b/net/sctp/socket.c
|
||||
@@ -4894,6 +4894,10 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
|
||||
struct socket *sock;
|
||||
int err = 0;
|
||||
|
||||
+ /* Do not peel off from one netns to another one. */
|
||||
+ if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
if (!asoc)
|
||||
return -EINVAL;
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,94 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Tue, 11 Jul 2017 10:33:39 -0500
|
||||
Subject: [PATCH] x86/entry/64: Initialize the top of the IRQ stack before
|
||||
switching stacks
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The OOPS unwinder wants the word at the top of the IRQ stack to
|
||||
point back to the previous stack at all times when the IRQ stack
|
||||
is in use. There's currently a one-instruction window in ENTER_IRQ_STACK
|
||||
during which this isn't the case. Fix it by writing the old RSP to the
|
||||
top of the IRQ stack before jumping.
|
||||
|
||||
This currently writes the pointer to the stack twice, which is a bit
|
||||
ugly. We could get rid of this by replacing irq_stack_ptr with
|
||||
irq_stack_ptr_minus_eight (better name welcome). OTOH, there may be
|
||||
all kinds of odd microarchitectural considerations in play that
|
||||
affect performance by a few cycles here.
|
||||
|
||||
Reported-by: Mike Galbraith <efault@gmx.de>
|
||||
Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/aae7e79e49914808440ad5310ace138ced2179ca.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2995590964da93e1fd9a91550f9c9d9fab28f160)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a753ff654dfd07a7f8d6f39a27126589eac7e55f)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 07b4056af8a8..184b70712545 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -469,6 +469,7 @@ END(irq_entries_start)
|
||||
DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
movq %rsp, \old_rsp
|
||||
incl PER_CPU_VAR(irq_count)
|
||||
+ jnz .Lirq_stack_push_old_rsp_\@
|
||||
|
||||
/*
|
||||
* Right now, if we just incremented irq_count to zero, we've
|
||||
@@ -478,9 +479,30 @@ END(irq_entries_start)
|
||||
* it must be *extremely* careful to limit its stack usage. This
|
||||
* could include kprobes and a hypothetical future IST-less #DB
|
||||
* handler.
|
||||
+ *
|
||||
+ * The OOPS unwinder relies on the word at the top of the IRQ
|
||||
+ * stack linking back to the previous RSP for the entire time we're
|
||||
+ * on the IRQ stack. For this to work reliably, we need to write
|
||||
+ * it before we actually move ourselves to the IRQ stack.
|
||||
+ */
|
||||
+
|
||||
+ movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
|
||||
+ movq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ /*
|
||||
+ * If the first movq above becomes wrong due to IRQ stack layout
|
||||
+ * changes, the only way we'll notice is if we try to unwind right
|
||||
+ * here. Assert that we set up the stack right to catch this type
|
||||
+ * of bug quickly.
|
||||
*/
|
||||
+ cmpq -8(%rsp), \old_rsp
|
||||
+ je .Lirq_stack_okay\@
|
||||
+ ud2
|
||||
+ .Lirq_stack_okay\@:
|
||||
+#endif
|
||||
|
||||
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
+.Lirq_stack_push_old_rsp_\@:
|
||||
pushq \old_rsp
|
||||
.endm
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,463 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 11 Jul 2017 10:33:44 -0500
|
||||
Subject: [PATCH] x86/entry/64: Add unwind hint annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add unwind hint annotations to entry_64.S. This will enable the ORC
|
||||
unwinder to unwind through any location in the entry code including
|
||||
syscalls, interrupts, and exceptions.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/b9f6d478aadf68ba57c739dcfac34ec0dc021c4c.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 8c1f75587a18ca032da8f6376d1ed882d7095289)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a8448e6971c1e71b22c651131d14f8be76e6d399)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/Makefile | 1 -
|
||||
arch/x86/entry/calling.h | 5 ++++
|
||||
arch/x86/entry/entry_64.S | 71 ++++++++++++++++++++++++++++++++++++++++-------
|
||||
3 files changed, 66 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
|
||||
index 9976fcecd17e..af28a8a24366 100644
|
||||
--- a/arch/x86/entry/Makefile
|
||||
+++ b/arch/x86/entry/Makefile
|
||||
@@ -2,7 +2,6 @@
|
||||
# Makefile for the x86 low level entry code
|
||||
#
|
||||
|
||||
-OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
|
||||
|
||||
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
|
||||
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
||||
index 05ed3d393da7..640aafebdc00 100644
|
||||
--- a/arch/x86/entry/calling.h
|
||||
+++ b/arch/x86/entry/calling.h
|
||||
@@ -1,4 +1,5 @@
|
||||
#include <linux/jump_label.h>
|
||||
+#include <asm/unwind_hints.h>
|
||||
|
||||
/*
|
||||
|
||||
@@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
movq %rdx, 12*8+\offset(%rsp)
|
||||
movq %rsi, 13*8+\offset(%rsp)
|
||||
movq %rdi, 14*8+\offset(%rsp)
|
||||
+ UNWIND_HINT_REGS offset=\offset extra=0
|
||||
.endm
|
||||
.macro SAVE_C_REGS offset=0
|
||||
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
|
||||
@@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
movq %r12, 3*8+\offset(%rsp)
|
||||
movq %rbp, 4*8+\offset(%rsp)
|
||||
movq %rbx, 5*8+\offset(%rsp)
|
||||
+ UNWIND_HINT_REGS offset=\offset
|
||||
.endm
|
||||
|
||||
.macro RESTORE_EXTRA_REGS offset=0
|
||||
@@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
movq 3*8+\offset(%rsp), %r12
|
||||
movq 4*8+\offset(%rsp), %rbp
|
||||
movq 5*8+\offset(%rsp), %rbx
|
||||
+ UNWIND_HINT_REGS offset=\offset extra=0
|
||||
.endm
|
||||
|
||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
@@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
.endif
|
||||
movq 13*8(%rsp), %rsi
|
||||
movq 14*8(%rsp), %rdi
|
||||
+ UNWIND_HINT_IRET_REGS offset=16*8
|
||||
.endm
|
||||
.macro RESTORE_C_REGS
|
||||
RESTORE_C_REGS_HELPER 1,1,1,1,1
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 184b70712545..64b233ab7cad 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <asm/smap.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/export.h>
|
||||
+#include <asm/frame.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
.code64
|
||||
@@ -43,9 +44,10 @@
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
ENTRY(native_usergs_sysret64)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
swapgs
|
||||
sysretq
|
||||
-ENDPROC(native_usergs_sysret64)
|
||||
+END(native_usergs_sysret64)
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
|
||||
.macro TRACE_IRQS_IRETQ
|
||||
@@ -134,6 +136,7 @@ ENDPROC(native_usergs_sysret64)
|
||||
*/
|
||||
|
||||
ENTRY(entry_SYSCALL_64)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
@@ -169,6 +172,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
pushq %r10 /* pt_regs->r10 */
|
||||
pushq %r11 /* pt_regs->r11 */
|
||||
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
|
||||
+ UNWIND_HINT_REGS extra=0
|
||||
|
||||
/*
|
||||
* If we need to do entry work or if we guess we'll need to do
|
||||
@@ -223,6 +227,7 @@ entry_SYSCALL_64_fastpath:
|
||||
movq EFLAGS(%rsp), %r11
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
+ UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
|
||||
1:
|
||||
@@ -316,6 +321,7 @@ syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
+ UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
@@ -343,6 +349,7 @@ ENTRY(stub_ptregs_64)
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
popq %rax
|
||||
+ UNWIND_HINT_REGS extra=0
|
||||
jmp entry_SYSCALL64_slow_path
|
||||
|
||||
1:
|
||||
@@ -351,6 +358,7 @@ END(stub_ptregs_64)
|
||||
|
||||
.macro ptregs_stub func
|
||||
ENTRY(ptregs_\func)
|
||||
+ UNWIND_HINT_FUNC
|
||||
leaq \func(%rip), %rax
|
||||
jmp stub_ptregs_64
|
||||
END(ptregs_\func)
|
||||
@@ -367,6 +375,7 @@ END(ptregs_\func)
|
||||
* %rsi: next task
|
||||
*/
|
||||
ENTRY(__switch_to_asm)
|
||||
+ UNWIND_HINT_FUNC
|
||||
/*
|
||||
* Save callee-saved registers
|
||||
* This must match the order in inactive_task_frame
|
||||
@@ -406,6 +415,7 @@ END(__switch_to_asm)
|
||||
* r12: kernel thread arg
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
movq %rax, %rdi
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
@@ -413,6 +423,7 @@ ENTRY(ret_from_fork)
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
|
||||
2:
|
||||
+ UNWIND_HINT_REGS
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
@@ -440,10 +451,11 @@ END(ret_from_fork)
|
||||
ENTRY(irq_entries_start)
|
||||
vector=FIRST_EXTERNAL_VECTOR
|
||||
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
pushq $(~vector+0x80) /* Note: always in signed byte range */
|
||||
- vector=vector+1
|
||||
jmp common_interrupt
|
||||
.align 8
|
||||
+ vector=vector+1
|
||||
.endr
|
||||
END(irq_entries_start)
|
||||
|
||||
@@ -465,9 +477,14 @@ END(irq_entries_start)
|
||||
*
|
||||
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
|
||||
*/
|
||||
-.macro ENTER_IRQ_STACK old_rsp
|
||||
+.macro ENTER_IRQ_STACK regs=1 old_rsp
|
||||
DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
movq %rsp, \old_rsp
|
||||
+
|
||||
+ .if \regs
|
||||
+ UNWIND_HINT_REGS base=\old_rsp
|
||||
+ .endif
|
||||
+
|
||||
incl PER_CPU_VAR(irq_count)
|
||||
jnz .Lirq_stack_push_old_rsp_\@
|
||||
|
||||
@@ -504,16 +521,24 @@ END(irq_entries_start)
|
||||
|
||||
.Lirq_stack_push_old_rsp_\@:
|
||||
pushq \old_rsp
|
||||
+
|
||||
+ .if \regs
|
||||
+ UNWIND_HINT_REGS indirect=1
|
||||
+ .endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Undoes ENTER_IRQ_STACK.
|
||||
*/
|
||||
-.macro LEAVE_IRQ_STACK
|
||||
+.macro LEAVE_IRQ_STACK regs=1
|
||||
DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
/* We need to be off the IRQ stack before decrementing irq_count. */
|
||||
popq %rsp
|
||||
|
||||
+ .if \regs
|
||||
+ UNWIND_HINT_REGS
|
||||
+ .endif
|
||||
+
|
||||
/*
|
||||
* As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
|
||||
* the irq stack but we're not on it.
|
||||
@@ -624,6 +649,7 @@ restore_c_regs_and_iret:
|
||||
INTERRUPT_RETURN
|
||||
|
||||
ENTRY(native_iret)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
/*
|
||||
* Are we returning to a stack segment from the LDT? Note: in
|
||||
* 64-bit mode SS:RSP on the exception stack is always valid.
|
||||
@@ -696,6 +722,7 @@ native_irq_return_ldt:
|
||||
orq PER_CPU_VAR(espfix_stack), %rax
|
||||
SWAPGS
|
||||
movq %rax, %rsp
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
|
||||
/*
|
||||
* At this point, we cannot write to the stack any more, but we can
|
||||
@@ -717,6 +744,7 @@ END(common_interrupt)
|
||||
*/
|
||||
.macro apicinterrupt3 num sym do_sym
|
||||
ENTRY(\sym)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
ASM_CLAC
|
||||
pushq $~(\num)
|
||||
.Lcommon_\sym:
|
||||
@@ -803,6 +831,8 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
ENTRY(\sym)
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
+
|
||||
/* Sanity check */
|
||||
.if \shift_ist != -1 && \paranoid == 0
|
||||
.error "using shift_ist requires paranoid=1"
|
||||
@@ -826,6 +856,7 @@ ENTRY(\sym)
|
||||
.else
|
||||
call error_entry
|
||||
.endif
|
||||
+ UNWIND_HINT_REGS
|
||||
/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
|
||||
|
||||
.if \paranoid
|
||||
@@ -923,6 +954,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
|
||||
* edi: new selector
|
||||
*/
|
||||
ENTRY(native_load_gs_index)
|
||||
+ FRAME_BEGIN
|
||||
pushfq
|
||||
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
|
||||
SWAPGS
|
||||
@@ -931,8 +963,9 @@ ENTRY(native_load_gs_index)
|
||||
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
|
||||
SWAPGS
|
||||
popfq
|
||||
+ FRAME_END
|
||||
ret
|
||||
-END(native_load_gs_index)
|
||||
+ENDPROC(native_load_gs_index)
|
||||
EXPORT_SYMBOL(native_load_gs_index)
|
||||
|
||||
_ASM_EXTABLE(.Lgs_change, bad_gs)
|
||||
@@ -955,12 +988,12 @@ bad_gs:
|
||||
ENTRY(do_softirq_own_stack)
|
||||
pushq %rbp
|
||||
mov %rsp, %rbp
|
||||
- ENTER_IRQ_STACK old_rsp=%r11
|
||||
+ ENTER_IRQ_STACK regs=0 old_rsp=%r11
|
||||
call __do_softirq
|
||||
- LEAVE_IRQ_STACK
|
||||
+ LEAVE_IRQ_STACK regs=0
|
||||
leaveq
|
||||
ret
|
||||
-END(do_softirq_own_stack)
|
||||
+ENDPROC(do_softirq_own_stack)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
|
||||
@@ -984,7 +1017,9 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
|
||||
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
|
||||
* see the correct pointer to the pt_regs
|
||||
*/
|
||||
+ UNWIND_HINT_FUNC
|
||||
movq %rdi, %rsp /* we don't return, adjust the stack frame */
|
||||
+ UNWIND_HINT_REGS
|
||||
|
||||
ENTER_IRQ_STACK old_rsp=%r10
|
||||
call xen_evtchn_do_upcall
|
||||
@@ -1010,6 +1045,7 @@ END(xen_do_hypervisor_callback)
|
||||
* with its current contents: any discrepancy means we in category 1.
|
||||
*/
|
||||
ENTRY(xen_failsafe_callback)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
movl %ds, %ecx
|
||||
cmpw %cx, 0x10(%rsp)
|
||||
jne 1f
|
||||
@@ -1029,11 +1065,13 @@ ENTRY(xen_failsafe_callback)
|
||||
pushq $0 /* RIP */
|
||||
pushq %r11
|
||||
pushq %rcx
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
jmp general_protection
|
||||
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
|
||||
movq (%rsp), %rcx
|
||||
movq 8(%rsp), %r11
|
||||
addq $0x30, %rsp
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
pushq $-1 /* orig_ax = -1 => not a system call */
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
SAVE_C_REGS
|
||||
@@ -1079,6 +1117,7 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec
|
||||
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
|
||||
*/
|
||||
ENTRY(paranoid_entry)
|
||||
+ UNWIND_HINT_FUNC
|
||||
cld
|
||||
SAVE_C_REGS 8
|
||||
SAVE_EXTRA_REGS 8
|
||||
@@ -1106,6 +1145,7 @@ END(paranoid_entry)
|
||||
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
|
||||
*/
|
||||
ENTRY(paranoid_exit)
|
||||
+ UNWIND_HINT_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF_DEBUG
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
@@ -1127,6 +1167,7 @@ END(paranoid_exit)
|
||||
* Return: EBX=0: came from user mode; EBX=1: otherwise
|
||||
*/
|
||||
ENTRY(error_entry)
|
||||
+ UNWIND_HINT_FUNC
|
||||
cld
|
||||
SAVE_C_REGS 8
|
||||
SAVE_EXTRA_REGS 8
|
||||
@@ -1211,6 +1252,7 @@ END(error_entry)
|
||||
* 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
|
||||
*/
|
||||
ENTRY(error_exit)
|
||||
+ UNWIND_HINT_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
testl %ebx, %ebx
|
||||
@@ -1220,6 +1262,7 @@ END(error_exit)
|
||||
|
||||
/* Runs on exception stack */
|
||||
ENTRY(nmi)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
/*
|
||||
* Fix up the exception frame if we're on Xen.
|
||||
* PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
|
||||
@@ -1293,11 +1336,13 @@ ENTRY(nmi)
|
||||
cld
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
+ UNWIND_HINT_IRET_REGS base=%rdx offset=8
|
||||
pushq 5*8(%rdx) /* pt_regs->ss */
|
||||
pushq 4*8(%rdx) /* pt_regs->rsp */
|
||||
pushq 3*8(%rdx) /* pt_regs->flags */
|
||||
pushq 2*8(%rdx) /* pt_regs->cs */
|
||||
pushq 1*8(%rdx) /* pt_regs->rip */
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
pushq $-1 /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
@@ -1314,6 +1359,7 @@ ENTRY(nmi)
|
||||
pushq %r13 /* pt_regs->r13 */
|
||||
pushq %r14 /* pt_regs->r14 */
|
||||
pushq %r15 /* pt_regs->r15 */
|
||||
+ UNWIND_HINT_REGS
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
/*
|
||||
@@ -1468,6 +1514,7 @@ first_nmi:
|
||||
.rept 5
|
||||
pushq 11*8(%rsp)
|
||||
.endr
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
|
||||
/* Everything up to here is safe from nested NMIs */
|
||||
|
||||
@@ -1483,6 +1530,7 @@ first_nmi:
|
||||
pushq $__KERNEL_CS /* CS */
|
||||
pushq $1f /* RIP */
|
||||
INTERRUPT_RETURN /* continues at repeat_nmi below */
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
1:
|
||||
#endif
|
||||
|
||||
@@ -1532,6 +1580,7 @@ end_repeat_nmi:
|
||||
* exceptions might do.
|
||||
*/
|
||||
call paranoid_entry
|
||||
+ UNWIND_HINT_REGS
|
||||
|
||||
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
||||
movq %rsp, %rdi
|
||||
@@ -1569,17 +1618,19 @@ nmi_restore:
|
||||
END(nmi)
|
||||
|
||||
ENTRY(ignore_sysret)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
mov $-ENOSYS, %eax
|
||||
sysret
|
||||
END(ignore_sysret)
|
||||
|
||||
ENTRY(rewind_stack_do_exit)
|
||||
+ UNWIND_HINT_FUNC
|
||||
/* Prevent any naive code from trying to unwind to our caller. */
|
||||
xorl %ebp, %ebp
|
||||
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
|
||||
- leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
|
||||
+ leaq -PTREGS_SIZE(%rax), %rsp
|
||||
+ UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
|
||||
|
||||
call do_exit
|
||||
-1: jmp 1b
|
||||
END(rewind_stack_do_exit)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Mon, 17 Jul 2017 16:10:29 -0500
|
||||
Subject: [PATCH] xen/x86: Remove SME feature in PV guests
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen does not currently support SME for PV guests. Clear the SME CPU
|
||||
capability in order to avoid any ambiguity.
|
||||
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: <xen-devel@lists.xen.org>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Cc: Dave Young <dyoung@redhat.com>
|
||||
Cc: Dmitry Vyukov <dvyukov@google.com>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Cc: Larry Woodman <lwoodman@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Matt Fleming <matt@codeblueprint.co.uk>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Radim Krčmář <rkrcmar@redhat.com>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
|
||||
Cc: kasan-dev@googlegroups.com
|
||||
Cc: kvm@vger.kernel.org
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: linux-doc@vger.kernel.org
|
||||
Cc: linux-efi@vger.kernel.org
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/3b605622a9fae5e588e5a13967120a18ec18071b.1500319216.git.thomas.lendacky@amd.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit f2f931c6819467af5260a21c59fb787ce2863f92)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8370907399392a637a2e51b4db3368fb594db3a6)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/enlighten_pv.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 290bc5ac9852..df1921751aa5 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -263,6 +263,7 @@ static void __init xen_init_capabilities(void)
|
||||
setup_clear_cpu_cap(X86_FEATURE_MTRR);
|
||||
setup_clear_cpu_cap(X86_FEATURE_ACC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_SME);
|
||||
|
||||
/*
|
||||
* Xen PV would need some work to support PCID: CR3 handling as well
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,152 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 7 Aug 2017 20:59:21 -0700
|
||||
Subject: [PATCH] x86/xen/64: Rearrange the SYSCALL entries
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen's raw SYSCALL entries are much less weird than native. Rather
|
||||
than fudging them to look like native entries, use the Xen-provided
|
||||
stack frame directly.
|
||||
|
||||
This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
|
||||
the SWAPGS_UNSAFE_STACK paravirt hook. The SYSENTER code would
|
||||
benefit from similar treatment.
|
||||
|
||||
This makes one change to the native code path: the compat
|
||||
instruction that clears the high 32 bits of %rax is moved slightly
|
||||
later. I'd be surprised if this affects performance at all.
|
||||
|
||||
Tested-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Link: http://lkml.kernel.org/r/7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 8a9949bc71a71b3dd633255ebe8f8869b1f73474)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b8cec41ee5f30df5032cfe8c86103f7d92a89590)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 9 ++-------
|
||||
arch/x86/entry/entry_64_compat.S | 7 +++----
|
||||
arch/x86/xen/xen-asm_64.S | 23 +++++++++--------------
|
||||
3 files changed, 14 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 64b233ab7cad..4dbb336a1fdd 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
- SWAPGS_UNSAFE_STACK
|
||||
- /*
|
||||
- * A hypervisor implementation might want to use a label
|
||||
- * after the swapgs, so that it can do the swapgs
|
||||
- * for the guest and jump here on syscall.
|
||||
- */
|
||||
-GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
|
||||
+ swapgs
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
@@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
+GLOBAL(entry_SYSCALL_64_after_hwframe)
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index e1721dafbcb1..5314d7b8e5ad 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
|
||||
*/
|
||||
ENTRY(entry_SYSCALL_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
- SWAPGS_UNSAFE_STACK
|
||||
+ swapgs
|
||||
|
||||
/* Stash user ESP and switch to the kernel stack. */
|
||||
movl %esp, %r8d
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
- /* Zero-extending 32-bit regs, do not remove */
|
||||
- movl %eax, %eax
|
||||
-
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER32_DS /* pt_regs->ss */
|
||||
pushq %r8 /* pt_regs->sp */
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER32_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
|
||||
+ movl %eax, %eax /* discard orig_ax high bits */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index c3df43141e70..a8a4f4c460a6 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
|
||||
* rip
|
||||
* r11
|
||||
* rsp->rcx
|
||||
- *
|
||||
- * In all the entrypoints, we undo all that to make it look like a
|
||||
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
|
||||
*/
|
||||
|
||||
-.macro undo_xen_syscall
|
||||
- mov 0*8(%rsp), %rcx
|
||||
- mov 1*8(%rsp), %r11
|
||||
- mov 5*8(%rsp), %rsp
|
||||
-.endm
|
||||
-
|
||||
/* Normal 64-bit system call target */
|
||||
ENTRY(xen_syscall_target)
|
||||
- undo_xen_syscall
|
||||
- jmp entry_SYSCALL_64_after_swapgs
|
||||
+ popq %rcx
|
||||
+ popq %r11
|
||||
+ jmp entry_SYSCALL_64_after_hwframe
|
||||
ENDPROC(xen_syscall_target)
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
/* 32-bit compat syscall target */
|
||||
ENTRY(xen_syscall32_target)
|
||||
- undo_xen_syscall
|
||||
- jmp entry_SYSCALL_compat
|
||||
+ popq %rcx
|
||||
+ popq %r11
|
||||
+ jmp entry_SYSCALL_compat_after_hwframe
|
||||
ENDPROC(xen_syscall32_target)
|
||||
|
||||
/* 32-bit compat sysenter target */
|
||||
ENTRY(xen_sysenter_target)
|
||||
- undo_xen_syscall
|
||||
+ mov 0*8(%rsp), %rcx
|
||||
+ mov 1*8(%rsp), %r11
|
||||
+ mov 5*8(%rsp), %rsp
|
||||
jmp entry_SYSENTER_compat
|
||||
ENDPROC(xen_sysenter_target)
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,223 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Date: Thu, 3 Aug 2017 11:38:21 +0900
|
||||
Subject: [PATCH] irq: Make the irqentry text section unconditional
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Generate irqentry and softirqentry text sections without
|
||||
any Kconfig dependencies. This will add extra sections, but
|
||||
there should be no performace impact.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
|
||||
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
|
||||
Cc: Chris Zankel <chris@zankel.net>
|
||||
Cc: David S . Miller <davem@davemloft.net>
|
||||
Cc: Francis Deslauriers <francis.deslauriers@efficios.com>
|
||||
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Max Filippov <jcmvbkbc@gmail.com>
|
||||
Cc: Mikael Starvik <starvik@axis.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: linux-cris-kernel@axis.com
|
||||
Cc: mathieu.desnoyers@efficios.com
|
||||
Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 229a71860547ec856b156179a9c6bef2de426f66)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8fd2f68cc93ae772cfddf4151d13448ff17d0229)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/arm/include/asm/traps.h | 7 -------
|
||||
arch/arm64/include/asm/traps.h | 7 -------
|
||||
include/asm-generic/sections.h | 4 ++++
|
||||
include/asm-generic/vmlinux.lds.h | 8 --------
|
||||
include/linux/interrupt.h | 14 +-------------
|
||||
arch/x86/kernel/unwind_frame.c | 2 --
|
||||
arch/x86/entry/entry_64.S | 9 ++-------
|
||||
7 files changed, 7 insertions(+), 44 deletions(-)
|
||||
|
||||
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
|
||||
index f555bb3664dc..683d9230984a 100644
|
||||
--- a/arch/arm/include/asm/traps.h
|
||||
+++ b/arch/arm/include/asm/traps.h
|
||||
@@ -18,7 +18,6 @@ struct undef_hook {
|
||||
void register_undef_hook(struct undef_hook *hook);
|
||||
void unregister_undef_hook(struct undef_hook *hook);
|
||||
|
||||
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
extern char __irqentry_text_start[];
|
||||
@@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
|
||||
return ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end;
|
||||
}
|
||||
-#else
|
||||
-static inline int __in_irqentry_text(unsigned long ptr)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-#endif
|
||||
|
||||
static inline int in_exception_text(unsigned long ptr)
|
||||
{
|
||||
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
|
||||
index 02e9035b0685..47a9066f7c86 100644
|
||||
--- a/arch/arm64/include/asm/traps.h
|
||||
+++ b/arch/arm64/include/asm/traps.h
|
||||
@@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook);
|
||||
|
||||
void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
|
||||
|
||||
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
return ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end;
|
||||
}
|
||||
-#else
|
||||
-static inline int __in_irqentry_text(unsigned long ptr)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-#endif
|
||||
|
||||
static inline int in_exception_text(unsigned long ptr)
|
||||
{
|
||||
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
|
||||
index 532372c6cf15..e5da44eddd2f 100644
|
||||
--- a/include/asm-generic/sections.h
|
||||
+++ b/include/asm-generic/sections.h
|
||||
@@ -27,6 +27,8 @@
|
||||
* __kprobes_text_start, __kprobes_text_end
|
||||
* __entry_text_start, __entry_text_end
|
||||
* __ctors_start, __ctors_end
|
||||
+ * __irqentry_text_start, __irqentry_text_end
|
||||
+ * __softirqentry_text_start, __softirqentry_text_end
|
||||
*/
|
||||
extern char _text[], _stext[], _etext[];
|
||||
extern char _data[], _sdata[], _edata[];
|
||||
@@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
|
||||
extern char __kprobes_text_start[], __kprobes_text_end[];
|
||||
extern char __entry_text_start[], __entry_text_end[];
|
||||
extern char __start_rodata[], __end_rodata[];
|
||||
+extern char __irqentry_text_start[], __irqentry_text_end[];
|
||||
+extern char __softirqentry_text_start[], __softirqentry_text_end[];
|
||||
|
||||
/* Start and end of .ctors section - used for constructor calls. */
|
||||
extern char __ctors_start[], __ctors_end[];
|
||||
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
|
||||
index 9623d78f8494..e7e955d4ab9e 100644
|
||||
--- a/include/asm-generic/vmlinux.lds.h
|
||||
+++ b/include/asm-generic/vmlinux.lds.h
|
||||
@@ -497,25 +497,17 @@
|
||||
*(.entry.text) \
|
||||
VMLINUX_SYMBOL(__entry_text_end) = .;
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
#define IRQENTRY_TEXT \
|
||||
ALIGN_FUNCTION(); \
|
||||
VMLINUX_SYMBOL(__irqentry_text_start) = .; \
|
||||
*(.irqentry.text) \
|
||||
VMLINUX_SYMBOL(__irqentry_text_end) = .;
|
||||
-#else
|
||||
-#define IRQENTRY_TEXT
|
||||
-#endif
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
#define SOFTIRQENTRY_TEXT \
|
||||
ALIGN_FUNCTION(); \
|
||||
VMLINUX_SYMBOL(__softirqentry_text_start) = .; \
|
||||
*(.softirqentry.text) \
|
||||
VMLINUX_SYMBOL(__softirqentry_text_end) = .;
|
||||
-#else
|
||||
-#define SOFTIRQENTRY_TEXT
|
||||
-#endif
|
||||
|
||||
/* Section used for early init (in .S files) */
|
||||
#define HEAD_TEXT *(.head.text)
|
||||
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
|
||||
index a2fddddb0d60..59ba11661b6e 100644
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/irq.h>
|
||||
+#include <asm/sections.h>
|
||||
|
||||
/*
|
||||
* These correspond to the IORESOURCE_IRQ_* defines in
|
||||
@@ -726,7 +727,6 @@ extern int early_irq_init(void);
|
||||
extern int arch_probe_nr_irqs(void);
|
||||
extern int arch_early_irq_init(void);
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
/*
|
||||
* We want to know which function is an entrypoint of a hardirq or a softirq.
|
||||
*/
|
||||
@@ -734,16 +734,4 @@ extern int arch_early_irq_init(void);
|
||||
#define __softirq_entry \
|
||||
__attribute__((__section__(".softirqentry.text")))
|
||||
|
||||
-/* Limits of hardirq entrypoints */
|
||||
-extern char __irqentry_text_start[];
|
||||
-extern char __irqentry_text_end[];
|
||||
-/* Limits of softirq entrypoints */
|
||||
-extern char __softirqentry_text_start[];
|
||||
-extern char __softirqentry_text_end[];
|
||||
-
|
||||
-#else
|
||||
-#define __irq_entry
|
||||
-#define __softirq_entry
|
||||
-#endif
|
||||
-
|
||||
#endif
|
||||
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
|
||||
index b9389d72b2f7..c29e5bc7e9c9 100644
|
||||
--- a/arch/x86/kernel/unwind_frame.c
|
||||
+++ b/arch/x86/kernel/unwind_frame.c
|
||||
@@ -91,10 +91,8 @@ static bool in_entry_code(unsigned long ip)
|
||||
if (addr >= __entry_text_start && addr < __entry_text_end)
|
||||
return true;
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
|
||||
return true;
|
||||
-#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 4dbb336a1fdd..ca0b250eefc4 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -761,13 +761,8 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
|
||||
#endif
|
||||
|
||||
/* Make sure APIC interrupt handlers end up in the irqentry section: */
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
-# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
|
||||
-# define POP_SECTION_IRQENTRY .popsection
|
||||
-#else
|
||||
-# define PUSH_SECTION_IRQENTRY
|
||||
-# define POP_SECTION_IRQENTRY
|
||||
-#endif
|
||||
+#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
|
||||
+#define POP_SECTION_IRQENTRY .popsection
|
||||
|
||||
.macro apicinterrupt num sym do_sym
|
||||
PUSH_SECTION_IRQENTRY
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,84 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 14 Aug 2017 22:36:19 -0700
|
||||
Subject: [PATCH] x86/xen/64: Fix the reported SS and CS in SYSCALL
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
When I cleaned up the Xen SYSCALL entries, I inadvertently changed
|
||||
the reported segment registers. Before my patch, regs->ss was
|
||||
__USER(32)_DS and regs->cs was __USER(32)_CS. After the patch, they
|
||||
are FLAT_USER_CS/DS(32).
|
||||
|
||||
This had a couple unfortunate effects. It confused the
|
||||
opportunistic fast return logic. It also significantly increased
|
||||
the risk of triggering a nasty glibc bug:
|
||||
|
||||
https://sourceware.org/bugzilla/show_bug.cgi?id=21269
|
||||
|
||||
Update the Xen entry code to change it back.
|
||||
|
||||
Reported-by: Brian Gerst <brgerst@gmail.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Fixes: 8a9949bc71a7 ("x86/xen/64: Rearrange the SYSCALL entries")
|
||||
Link: http://lkml.kernel.org/r/daba8351ea2764bb30272296ab9ce08a81bd8264.1502775273.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit fa2016a8e7d846b306e431646d250500e1da0c33)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 69a6ef3aeb274efe86fd74771830354f303ccc2f)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-asm_64.S | 18 ++++++++++++++++++
|
||||
1 file changed, 18 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index a8a4f4c460a6..c5fee2680abc 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -88,6 +88,15 @@ RELOC(xen_sysret64, 1b+1)
|
||||
ENTRY(xen_syscall_target)
|
||||
popq %rcx
|
||||
popq %r11
|
||||
+
|
||||
+ /*
|
||||
+ * Neither Xen nor the kernel really knows what the old SS and
|
||||
+ * CS were. The kernel expects __USER_DS and __USER_CS, so
|
||||
+ * report those values even though Xen will guess its own values.
|
||||
+ */
|
||||
+ movq $__USER_DS, 4*8(%rsp)
|
||||
+ movq $__USER_CS, 1*8(%rsp)
|
||||
+
|
||||
jmp entry_SYSCALL_64_after_hwframe
|
||||
ENDPROC(xen_syscall_target)
|
||||
|
||||
@@ -97,6 +106,15 @@ ENDPROC(xen_syscall_target)
|
||||
ENTRY(xen_syscall32_target)
|
||||
popq %rcx
|
||||
popq %r11
|
||||
+
|
||||
+ /*
|
||||
+ * Neither Xen nor the kernel really knows what the old SS and
|
||||
+ * CS were. The kernel expects __USER32_DS and __USER32_CS, so
|
||||
+ * report those values even though Xen will guess its own values.
|
||||
+ */
|
||||
+ movq $__USER32_DS, 4*8(%rsp)
|
||||
+ movq $__USER32_CS, 1*8(%rsp)
|
||||
+
|
||||
jmp entry_SYSCALL_compat_after_hwframe
|
||||
ENDPROC(xen_syscall32_target)
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,360 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Wed, 16 Aug 2017 19:31:56 +0200
|
||||
Subject: [PATCH] x86/paravirt/xen: Remove xen_patch()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen's paravirt patch function xen_patch() does some special casing for
|
||||
irq_ops functions to apply relocations when those functions can be
|
||||
patched inline instead of calls.
|
||||
|
||||
Unfortunately none of the special case function replacements is small
|
||||
enough to be patched inline, so the special case never applies.
|
||||
|
||||
As xen_patch() will call paravirt_patch_default() in all cases it can
|
||||
be just dropped. xen-asm.h doesn't seem necessary without xen_patch()
|
||||
as the only thing left in it would be the definition of XEN_EFLAGS_NMI
|
||||
used only once. So move that definition and remove xen-asm.h.
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: boris.ostrovsky@oracle.com
|
||||
Cc: lguest@lists.ozlabs.org
|
||||
Cc: rusty@rustcorp.com.au
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Link: http://lkml.kernel.org/r/20170816173157.8633-2-jgross@suse.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit edcb5cf84f05e5d2e2af25422a72ccde359fcca9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c96c9c712136a9e24a7aaf0aac4c149eee01bd8e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-asm.h | 12 ---------
|
||||
arch/x86/xen/xen-ops.h | 15 +++---------
|
||||
arch/x86/xen/enlighten_pv.c | 59 +--------------------------------------------
|
||||
arch/x86/xen/xen-asm.S | 26 +++++---------------
|
||||
arch/x86/xen/xen-asm_32.S | 27 ++++-----------------
|
||||
arch/x86/xen/xen-asm_64.S | 20 ++++-----------
|
||||
6 files changed, 21 insertions(+), 138 deletions(-)
|
||||
delete mode 100644 arch/x86/xen/xen-asm.h
|
||||
|
||||
diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h
|
||||
deleted file mode 100644
|
||||
index 465276467a47..000000000000
|
||||
--- a/arch/x86/xen/xen-asm.h
|
||||
+++ /dev/null
|
||||
@@ -1,12 +0,0 @@
|
||||
-#ifndef _XEN_XEN_ASM_H
|
||||
-#define _XEN_XEN_ASM_H
|
||||
-
|
||||
-#include <linux/linkage.h>
|
||||
-
|
||||
-#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
|
||||
-#define ENDPATCH(x) .globl x##_end; x##_end=.
|
||||
-
|
||||
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
-#define XEN_EFLAGS_NMI 0x80000000
|
||||
-
|
||||
-#endif
|
||||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
|
||||
index 0d5004477db6..70301ac0d414 100644
|
||||
--- a/arch/x86/xen/xen-ops.h
|
||||
+++ b/arch/x86/xen/xen-ops.h
|
||||
@@ -129,17 +129,10 @@ static inline void __init xen_efi_init(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
-/* Declare an asm function, along with symbols needed to make it
|
||||
- inlineable */
|
||||
-#define DECL_ASM(ret, name, ...) \
|
||||
- __visible ret name(__VA_ARGS__); \
|
||||
- extern char name##_end[] __visible; \
|
||||
- extern char name##_reloc[] __visible
|
||||
-
|
||||
-DECL_ASM(void, xen_irq_enable_direct, void);
|
||||
-DECL_ASM(void, xen_irq_disable_direct, void);
|
||||
-DECL_ASM(unsigned long, xen_save_fl_direct, void);
|
||||
-DECL_ASM(void, xen_restore_fl_direct, unsigned long);
|
||||
+__visible void xen_irq_enable_direct(void);
|
||||
+__visible void xen_irq_disable_direct(void);
|
||||
+__visible unsigned long xen_save_fl_direct(void);
|
||||
+__visible void xen_restore_fl_direct(unsigned long);
|
||||
|
||||
/* These are not functions, and cannot be called normally */
|
||||
__visible void xen_iret(void);
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index df1921751aa5..6c279c8f0a0e 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -988,59 +988,6 @@ void __ref xen_setup_vcpu_info_placement(void)
|
||||
}
|
||||
}
|
||||
|
||||
-static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||
- unsigned long addr, unsigned len)
|
||||
-{
|
||||
- char *start, *end, *reloc;
|
||||
- unsigned ret;
|
||||
-
|
||||
- start = end = reloc = NULL;
|
||||
-
|
||||
-#define SITE(op, x) \
|
||||
- case PARAVIRT_PATCH(op.x): \
|
||||
- if (xen_have_vcpu_info_placement) { \
|
||||
- start = (char *)xen_##x##_direct; \
|
||||
- end = xen_##x##_direct_end; \
|
||||
- reloc = xen_##x##_direct_reloc; \
|
||||
- } \
|
||||
- goto patch_site
|
||||
-
|
||||
- switch (type) {
|
||||
- SITE(pv_irq_ops, irq_enable);
|
||||
- SITE(pv_irq_ops, irq_disable);
|
||||
- SITE(pv_irq_ops, save_fl);
|
||||
- SITE(pv_irq_ops, restore_fl);
|
||||
-#undef SITE
|
||||
-
|
||||
- patch_site:
|
||||
- if (start == NULL || (end-start) > len)
|
||||
- goto default_patch;
|
||||
-
|
||||
- ret = paravirt_patch_insns(insnbuf, len, start, end);
|
||||
-
|
||||
- /* Note: because reloc is assigned from something that
|
||||
- appears to be an array, gcc assumes it's non-null,
|
||||
- but doesn't know its relationship with start and
|
||||
- end. */
|
||||
- if (reloc > start && reloc < end) {
|
||||
- int reloc_off = reloc - start;
|
||||
- long *relocp = (long *)(insnbuf + reloc_off);
|
||||
- long delta = start - (char *)addr;
|
||||
-
|
||||
- *relocp += delta;
|
||||
- }
|
||||
- break;
|
||||
-
|
||||
- default_patch:
|
||||
- default:
|
||||
- ret = paravirt_patch_default(type, clobbers, insnbuf,
|
||||
- addr, len);
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
static const struct pv_info xen_info __initconst = {
|
||||
.shared_kernel_pmd = 0,
|
||||
|
||||
@@ -1050,10 +997,6 @@ static const struct pv_info xen_info __initconst = {
|
||||
.name = "Xen",
|
||||
};
|
||||
|
||||
-static const struct pv_init_ops xen_init_ops __initconst = {
|
||||
- .patch = xen_patch,
|
||||
-};
|
||||
-
|
||||
static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.cpuid = xen_cpuid,
|
||||
|
||||
@@ -1251,7 +1194,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
||||
|
||||
/* Install Xen paravirt ops */
|
||||
pv_info = xen_info;
|
||||
- pv_init_ops = xen_init_ops;
|
||||
+ pv_init_ops.patch = paravirt_patch_default;
|
||||
pv_cpu_ops = xen_cpu_ops;
|
||||
|
||||
x86_platform.get_nmi_reason = xen_get_nmi_reason;
|
||||
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
|
||||
index eff224df813f..dcd31fa39b5d 100644
|
||||
--- a/arch/x86/xen/xen-asm.S
|
||||
+++ b/arch/x86/xen/xen-asm.S
|
||||
@@ -1,14 +1,8 @@
|
||||
/*
|
||||
- * Asm versions of Xen pv-ops, suitable for either direct use or
|
||||
- * inlining. The inline versions are the same as the direct-use
|
||||
- * versions, with the pre- and post-amble chopped off.
|
||||
- *
|
||||
- * This code is encoded for size rather than absolute efficiency, with
|
||||
- * a view to being able to inline as much as possible.
|
||||
+ * Asm versions of Xen pv-ops, suitable for direct use.
|
||||
*
|
||||
* We only bother with direct forms (ie, vcpu in percpu data) of the
|
||||
- * operations here; the indirect forms are better handled in C, since
|
||||
- * they're generally too large to inline anyway.
|
||||
+ * operations here; the indirect forms are better handled in C.
|
||||
*/
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
@@ -16,7 +10,7 @@
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
-#include "xen-asm.h"
|
||||
+#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* Enable events. This clears the event mask and tests the pending
|
||||
@@ -38,13 +32,11 @@ ENTRY(xen_irq_enable_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
|
||||
-2: call check_events
|
||||
+ call check_events
|
||||
1:
|
||||
-ENDPATCH(xen_irq_enable_direct)
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(xen_irq_enable_direct)
|
||||
- RELOC(xen_irq_enable_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
@@ -53,10 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
|
||||
*/
|
||||
ENTRY(xen_irq_disable_direct)
|
||||
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
-ENDPATCH(xen_irq_disable_direct)
|
||||
ret
|
||||
- ENDPROC(xen_irq_disable_direct)
|
||||
- RELOC(xen_irq_disable_direct, 0)
|
||||
+ENDPROC(xen_irq_disable_direct)
|
||||
|
||||
/*
|
||||
* (xen_)save_fl is used to get the current interrupt enable status.
|
||||
@@ -71,10 +61,8 @@ ENTRY(xen_save_fl_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
setz %ah
|
||||
addb %ah, %ah
|
||||
-ENDPATCH(xen_save_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_save_fl_direct)
|
||||
- RELOC(xen_save_fl_direct, 0)
|
||||
|
||||
|
||||
/*
|
||||
@@ -101,13 +89,11 @@ ENTRY(xen_restore_fl_direct)
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jnz 1f
|
||||
-2: call check_events
|
||||
+ call check_events
|
||||
1:
|
||||
-ENDPATCH(xen_restore_fl_direct)
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(xen_restore_fl_direct)
|
||||
- RELOC(xen_restore_fl_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
|
||||
index feb6d40a0860..1200e262a116 100644
|
||||
--- a/arch/x86/xen/xen-asm_32.S
|
||||
+++ b/arch/x86/xen/xen-asm_32.S
|
||||
@@ -1,14 +1,8 @@
|
||||
/*
|
||||
- * Asm versions of Xen pv-ops, suitable for either direct use or
|
||||
- * inlining. The inline versions are the same as the direct-use
|
||||
- * versions, with the pre- and post-amble chopped off.
|
||||
- *
|
||||
- * This code is encoded for size rather than absolute efficiency, with
|
||||
- * a view to being able to inline as much as possible.
|
||||
+ * Asm versions of Xen pv-ops, suitable for direct use.
|
||||
*
|
||||
* We only bother with direct forms (ie, vcpu in pda) of the
|
||||
- * operations here; the indirect forms are better handled in C, since
|
||||
- * they're generally too large to inline anyway.
|
||||
+ * operations here; the indirect forms are better handled in C.
|
||||
*/
|
||||
|
||||
#include <asm/thread_info.h>
|
||||
@@ -18,21 +12,10 @@
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
-#include "xen-asm.h"
|
||||
+#include <linux/linkage.h>
|
||||
|
||||
-/*
|
||||
- * Force an event check by making a hypercall, but preserve regs
|
||||
- * before making the call.
|
||||
- */
|
||||
-check_events:
|
||||
- push %eax
|
||||
- push %ecx
|
||||
- push %edx
|
||||
- call xen_force_evtchn_callback
|
||||
- pop %edx
|
||||
- pop %ecx
|
||||
- pop %eax
|
||||
- ret
|
||||
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
+#define XEN_EFLAGS_NMI 0x80000000
|
||||
|
||||
/*
|
||||
* This is run where a normal iret would be run, with the same stack setup:
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index c5fee2680abc..3a3b6a211584 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -1,14 +1,8 @@
|
||||
/*
|
||||
- * Asm versions of Xen pv-ops, suitable for either direct use or
|
||||
- * inlining. The inline versions are the same as the direct-use
|
||||
- * versions, with the pre- and post-amble chopped off.
|
||||
- *
|
||||
- * This code is encoded for size rather than absolute efficiency, with
|
||||
- * a view to being able to inline as much as possible.
|
||||
+ * Asm versions of Xen pv-ops, suitable for direct use.
|
||||
*
|
||||
* We only bother with direct forms (ie, vcpu in pda) of the
|
||||
- * operations here; the indirect forms are better handled in C, since
|
||||
- * they're generally too large to inline anyway.
|
||||
+ * operations here; the indirect forms are better handled in C.
|
||||
*/
|
||||
|
||||
#include <asm/errno.h>
|
||||
@@ -20,7 +14,7 @@
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
-#include "xen-asm.h"
|
||||
+#include <linux/linkage.h>
|
||||
|
||||
ENTRY(xen_adjust_exception_frame)
|
||||
mov 8+0(%rsp), %rcx
|
||||
@@ -46,9 +40,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
||||
*/
|
||||
ENTRY(xen_iret)
|
||||
pushq $0
|
||||
-1: jmp hypercall_iret
|
||||
-ENDPATCH(xen_iret)
|
||||
-RELOC(xen_iret, 1b+1)
|
||||
+ jmp hypercall_iret
|
||||
|
||||
ENTRY(xen_sysret64)
|
||||
/*
|
||||
@@ -65,9 +57,7 @@ ENTRY(xen_sysret64)
|
||||
pushq %rcx
|
||||
|
||||
pushq $VGCF_in_syscall
|
||||
-1: jmp hypercall_iret
|
||||
-ENDPATCH(xen_sysret64)
|
||||
-RELOC(xen_sysret64, 1b+1)
|
||||
+ jmp hypercall_iret
|
||||
|
||||
/*
|
||||
* Xen handles syscall callbacks much like ordinary exceptions, which
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,218 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:22 +0200
|
||||
Subject: [PATCH] x86/traps: Simplify pagefault tracing logic
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Make use of the new irqvector tracing static key and remove the duplicated
|
||||
trace_do_pagefault() implementation.
|
||||
|
||||
If irq vector tracing is disabled, then the overhead of this is a single
|
||||
NOP5, which is a reasonable tradeoff to avoid duplicated code and the
|
||||
unholy macro mess.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064956.672965407@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 11a7ffb01703c3bbb1e9b968893f4487a1b0b5a8)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8478bb5608747fd64c9fd4a2f5422fb4af756a50)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/traps.h | 10 +--------
|
||||
arch/x86/kernel/kvm.c | 2 +-
|
||||
arch/x86/mm/fault.c | 49 ++++++++++++--------------------------------
|
||||
arch/x86/entry/entry_32.S | 8 --------
|
||||
arch/x86/entry/entry_64.S | 13 +-----------
|
||||
5 files changed, 16 insertions(+), 66 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index 01fd0a7f48cd..b4f322d6c95f 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -39,7 +39,6 @@ asmlinkage void machine_check(void);
|
||||
asmlinkage void simd_coprocessor_error(void);
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
-asmlinkage void trace_page_fault(void);
|
||||
#define trace_stack_segment stack_segment
|
||||
#define trace_divide_error divide_error
|
||||
#define trace_bounds bounds
|
||||
@@ -54,6 +53,7 @@ asmlinkage void trace_page_fault(void);
|
||||
#define trace_alignment_check alignment_check
|
||||
#define trace_simd_coprocessor_error simd_coprocessor_error
|
||||
#define trace_async_page_fault async_page_fault
|
||||
+#define trace_page_fault page_fault
|
||||
#endif
|
||||
|
||||
dotraplinkage void do_divide_error(struct pt_regs *, long);
|
||||
@@ -74,14 +74,6 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
|
||||
#endif
|
||||
dotraplinkage void do_general_protection(struct pt_regs *, long);
|
||||
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
|
||||
-#ifdef CONFIG_TRACING
|
||||
-dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
|
||||
-#else
|
||||
-static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
|
||||
-{
|
||||
- do_page_fault(regs, error);
|
||||
-}
|
||||
-#endif
|
||||
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
|
||||
dotraplinkage void do_alignment_check(struct pt_regs *, long);
|
||||
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
|
||||
index e5e4306e4546..9e3798b00e40 100644
|
||||
--- a/arch/x86/kernel/kvm.c
|
||||
+++ b/arch/x86/kernel/kvm.c
|
||||
@@ -270,7 +270,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
|
||||
switch (kvm_read_and_reset_pf_reason()) {
|
||||
default:
|
||||
- trace_do_page_fault(regs, error_code);
|
||||
+ do_page_fault(regs, error_code);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
/* page is swapped out by the host. */
|
||||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
|
||||
index 955be01dd9cc..4ee9eb916826 100644
|
||||
--- a/arch/x86/mm/fault.c
|
||||
+++ b/arch/x86/mm/fault.c
|
||||
@@ -1253,10 +1253,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
|
||||
* This routine handles page faults. It determines the address,
|
||||
* and the problem, and then passes it off to one of the appropriate
|
||||
* routines.
|
||||
- *
|
||||
- * This function must have noinline because both callers
|
||||
- * {,trace_}do_page_fault() have notrace on. Having this an actual function
|
||||
- * guarantees there's a function trace entry.
|
||||
*/
|
||||
static noinline void
|
||||
__do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
@@ -1491,27 +1487,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
}
|
||||
NOKPROBE_SYMBOL(__do_page_fault);
|
||||
|
||||
-dotraplinkage void notrace
|
||||
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
-{
|
||||
- unsigned long address = read_cr2(); /* Get the faulting address */
|
||||
- enum ctx_state prev_state;
|
||||
-
|
||||
- /*
|
||||
- * We must have this function tagged with __kprobes, notrace and call
|
||||
- * read_cr2() before calling anything else. To avoid calling any kind
|
||||
- * of tracing machinery before we've observed the CR2 value.
|
||||
- *
|
||||
- * exception_{enter,exit}() contain all sorts of tracepoints.
|
||||
- */
|
||||
-
|
||||
- prev_state = exception_enter();
|
||||
- __do_page_fault(regs, error_code, address);
|
||||
- exception_exit(prev_state);
|
||||
-}
|
||||
-NOKPROBE_SYMBOL(do_page_fault);
|
||||
-
|
||||
-#ifdef CONFIG_TRACING
|
||||
static nokprobe_inline void
|
||||
trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
|
||||
unsigned long error_code)
|
||||
@@ -1522,22 +1497,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
|
||||
trace_page_fault_kernel(address, regs, error_code);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * We must have this function blacklisted from kprobes, tagged with notrace
|
||||
+ * and call read_cr2() before calling anything else. To avoid calling any
|
||||
+ * kind of tracing machinery before we've observed the CR2 value.
|
||||
+ *
|
||||
+ * exception_{enter,exit}() contains all sorts of tracepoints.
|
||||
+ */
|
||||
dotraplinkage void notrace
|
||||
-trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
- /*
|
||||
- * The exception_enter and tracepoint processing could
|
||||
- * trigger another page faults (user space callchain
|
||||
- * reading) and destroy the original cr2 value, so read
|
||||
- * the faulting address now.
|
||||
- */
|
||||
- unsigned long address = read_cr2();
|
||||
+ unsigned long address = read_cr2(); /* Get the faulting address */
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
- trace_page_fault_entries(address, regs, error_code);
|
||||
+ if (trace_irqvectors_enabled())
|
||||
+ trace_page_fault_entries(address, regs, error_code);
|
||||
+
|
||||
__do_page_fault(regs, error_code, address);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
-NOKPROBE_SYMBOL(trace_do_page_fault);
|
||||
-#endif /* CONFIG_TRACING */
|
||||
+NOKPROBE_SYMBOL(do_page_fault);
|
||||
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
|
||||
index 48ef7bb32c42..0092da1c056f 100644
|
||||
--- a/arch/x86/entry/entry_32.S
|
||||
+++ b/arch/x86/entry/entry_32.S
|
||||
@@ -891,14 +891,6 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
|
||||
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
-#ifdef CONFIG_TRACING
|
||||
-ENTRY(trace_page_fault)
|
||||
- ASM_CLAC
|
||||
- pushl $trace_do_page_fault
|
||||
- jmp common_exception
|
||||
-END(trace_page_fault)
|
||||
-#endif
|
||||
-
|
||||
ENTRY(page_fault)
|
||||
ASM_CLAC
|
||||
pushl $do_page_fault
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index ca0b250eefc4..dfabcbf8e813 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -913,17 +913,6 @@ ENTRY(\sym)
|
||||
END(\sym)
|
||||
.endm
|
||||
|
||||
-#ifdef CONFIG_TRACING
|
||||
-.macro trace_idtentry sym do_sym has_error_code:req
|
||||
-idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
|
||||
-idtentry \sym \do_sym has_error_code=\has_error_code
|
||||
-.endm
|
||||
-#else
|
||||
-.macro trace_idtentry sym do_sym has_error_code:req
|
||||
-idtentry \sym \do_sym has_error_code=\has_error_code
|
||||
-.endm
|
||||
-#endif
|
||||
-
|
||||
idtentry divide_error do_divide_error has_error_code=0
|
||||
idtentry overflow do_overflow has_error_code=0
|
||||
idtentry bounds do_bounds has_error_code=0
|
||||
@@ -1091,7 +1080,7 @@ idtentry xen_stack_segment do_stack_segment has_error_code=1
|
||||
#endif
|
||||
|
||||
idtentry general_protection do_general_protection has_error_code=1
|
||||
-trace_idtentry page_fault do_page_fault has_error_code=1
|
||||
+idtentry page_fault do_page_fault has_error_code=1
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
idtentry async_page_fault do_async_page_fault has_error_code=1
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,262 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:37 +0200
|
||||
Subject: [PATCH] x86/idt: Unify gate_struct handling for 32/64-bit kernels
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The first 32 bits of gate struct are the same for 32 and 64 bit kernels.
|
||||
|
||||
The 32-bit version uses desc_struct and no designated data structure,
|
||||
so we need different accessors for 32 and 64 bit kernels.
|
||||
|
||||
Aside of that the macros which are necessary to build the 32-bit
|
||||
gate descriptor are horrible to read.
|
||||
|
||||
Unify the gate structs and switch all code fiddling with it over.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064957.861974317@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 64b163fab684e3de47aa8db6cc08ae7d2e194373)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 587719b1926757eb7531e0631d63fb93cd60d0d3)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/desc.h | 45 ++++++++++++++-----------------
|
||||
arch/x86/include/asm/desc_defs.h | 57 ++++++++++++++++++++++++++--------------
|
||||
arch/x86/kvm/vmx.c | 2 +-
|
||||
arch/x86/xen/enlighten_pv.c | 12 ++++-----
|
||||
4 files changed, 63 insertions(+), 53 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
|
||||
index d0a21b12dd58..57e502a4e92f 100644
|
||||
--- a/arch/x86/include/asm/desc.h
|
||||
+++ b/arch/x86/include/asm/desc.h
|
||||
@@ -83,33 +83,25 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
|
||||
return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_X86_64
|
||||
-
|
||||
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
|
||||
unsigned dpl, unsigned ist, unsigned seg)
|
||||
{
|
||||
- gate->offset_low = PTR_LOW(func);
|
||||
+ gate->offset_low = (u16) func;
|
||||
+ gate->bits.p = 1;
|
||||
+ gate->bits.dpl = dpl;
|
||||
+ gate->bits.zero = 0;
|
||||
+ gate->bits.type = type;
|
||||
+ gate->offset_middle = (u16) (func >> 16);
|
||||
+#ifdef CONFIG_X86_64
|
||||
gate->segment = __KERNEL_CS;
|
||||
- gate->ist = ist;
|
||||
- gate->p = 1;
|
||||
- gate->dpl = dpl;
|
||||
- gate->zero0 = 0;
|
||||
- gate->zero1 = 0;
|
||||
- gate->type = type;
|
||||
- gate->offset_middle = PTR_MIDDLE(func);
|
||||
- gate->offset_high = PTR_HIGH(func);
|
||||
-}
|
||||
-
|
||||
+ gate->bits.ist = ist;
|
||||
+ gate->reserved = 0;
|
||||
+ gate->offset_high = (u32) (func >> 32);
|
||||
#else
|
||||
-static inline void pack_gate(gate_desc *gate, unsigned char type,
|
||||
- unsigned long base, unsigned dpl, unsigned flags,
|
||||
- unsigned short seg)
|
||||
-{
|
||||
- gate->a = (seg << 16) | (base & 0xffff);
|
||||
- gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
|
||||
-}
|
||||
-
|
||||
+ gate->segment = seg;
|
||||
+ gate->bits.ist = 0;
|
||||
#endif
|
||||
+}
|
||||
|
||||
static inline int desc_empty(const void *ptr)
|
||||
{
|
||||
@@ -185,7 +177,8 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
|
||||
}
|
||||
|
||||
|
||||
-static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
|
||||
+static inline void set_tssldt_descriptor(void *d, unsigned long addr,
|
||||
+ unsigned type, unsigned size)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
struct ldttss_desc64 *desc = d;
|
||||
@@ -193,13 +186,13 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
|
||||
desc->limit0 = size & 0xFFFF;
|
||||
- desc->base0 = PTR_LOW(addr);
|
||||
- desc->base1 = PTR_MIDDLE(addr) & 0xFF;
|
||||
+ desc->base0 = (u16) addr;
|
||||
+ desc->base1 = (addr >> 16) & 0xFF;
|
||||
desc->type = type;
|
||||
desc->p = 1;
|
||||
desc->limit1 = (size >> 16) & 0xF;
|
||||
- desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
|
||||
- desc->base3 = PTR_HIGH(addr);
|
||||
+ desc->base2 = (addr >> 24) & 0xFF;
|
||||
+ desc->base3 = (u32) (addr >> 32);
|
||||
#else
|
||||
pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
|
||||
#endif
|
||||
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
|
||||
index 49265345d4d2..d684bee8a59a 100644
|
||||
--- a/arch/x86/include/asm/desc_defs.h
|
||||
+++ b/arch/x86/include/asm/desc_defs.h
|
||||
@@ -47,20 +47,6 @@ enum {
|
||||
GATE_TASK = 0x5,
|
||||
};
|
||||
|
||||
-/* 16byte gate */
|
||||
-struct gate_struct64 {
|
||||
- u16 offset_low;
|
||||
- u16 segment;
|
||||
- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
|
||||
- u16 offset_middle;
|
||||
- u32 offset_high;
|
||||
- u32 zero1;
|
||||
-} __attribute__((packed));
|
||||
-
|
||||
-#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
|
||||
-#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
|
||||
-#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
|
||||
-
|
||||
enum {
|
||||
DESC_TSS = 0x9,
|
||||
DESC_LDT = 0x2,
|
||||
@@ -77,20 +63,51 @@ struct ldttss_desc64 {
|
||||
u32 zero1;
|
||||
} __attribute__((packed));
|
||||
|
||||
+
|
||||
#ifdef CONFIG_X86_64
|
||||
-typedef struct gate_struct64 gate_desc;
|
||||
typedef struct ldttss_desc64 ldt_desc;
|
||||
typedef struct ldttss_desc64 tss_desc;
|
||||
-#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
|
||||
-#define gate_segment(g) ((g).segment)
|
||||
#else
|
||||
-typedef struct desc_struct gate_desc;
|
||||
typedef struct desc_struct ldt_desc;
|
||||
typedef struct desc_struct tss_desc;
|
||||
-#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
|
||||
-#define gate_segment(g) ((g).a >> 16)
|
||||
#endif
|
||||
|
||||
+struct idt_bits {
|
||||
+ u16 ist : 3,
|
||||
+ zero : 5,
|
||||
+ type : 5,
|
||||
+ dpl : 2,
|
||||
+ p : 1;
|
||||
+} __attribute__((packed));
|
||||
+
|
||||
+struct gate_struct {
|
||||
+ u16 offset_low;
|
||||
+ u16 segment;
|
||||
+ struct idt_bits bits;
|
||||
+ u16 offset_middle;
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ u32 offset_high;
|
||||
+ u32 reserved;
|
||||
+#endif
|
||||
+} __attribute__((packed));
|
||||
+
|
||||
+typedef struct gate_struct gate_desc;
|
||||
+
|
||||
+static inline unsigned long gate_offset(const gate_desc *g)
|
||||
+{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ return g->offset_low | ((unsigned long)g->offset_middle << 16) |
|
||||
+ ((unsigned long) g->offset_high << 32);
|
||||
+#else
|
||||
+ return g->offset_low | ((unsigned long)g->offset_middle << 16);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long gate_segment(const gate_desc *g)
|
||||
+{
|
||||
+ return g->segment;
|
||||
+}
|
||||
+
|
||||
struct desc_ptr {
|
||||
unsigned short size;
|
||||
unsigned long address;
|
||||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
||||
index a2c95522ac99..7b447d126d17 100644
|
||||
--- a/arch/x86/kvm/vmx.c
|
||||
+++ b/arch/x86/kvm/vmx.c
|
||||
@@ -8838,7 +8838,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
|
||||
|
||||
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
||||
desc = (gate_desc *)vmx->host_idt_base + vector;
|
||||
- entry = gate_offset(*desc);
|
||||
+ entry = gate_offset(desc);
|
||||
asm volatile(
|
||||
#ifdef CONFIG_X86_64
|
||||
"mov %%" _ASM_SP ", %[sp]\n\t"
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 6c279c8f0a0e..49ee3315b9f7 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -591,12 +591,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
- if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
|
||||
+ if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
|
||||
return 0;
|
||||
|
||||
info->vector = vector;
|
||||
|
||||
- addr = gate_offset(*val);
|
||||
+ addr = gate_offset(val);
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Look for known traps using IST, and substitute them
|
||||
@@ -629,16 +629,16 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
;
|
||||
else {
|
||||
/* Some other trap using IST? */
|
||||
- if (WARN_ON(val->ist != 0))
|
||||
+ if (WARN_ON(val->bits.ist != 0))
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
info->address = addr;
|
||||
|
||||
- info->cs = gate_segment(*val);
|
||||
- info->flags = val->dpl;
|
||||
+ info->cs = gate_segment(val);
|
||||
+ info->flags = val->bits.dpl;
|
||||
/* interrupt gates clear IF */
|
||||
- if (val->type == GATE_INTERRUPT)
|
||||
+ if (val->bits.type == GATE_INTERRUPT)
|
||||
info->flags |= 1 << 2;
|
||||
|
||||
return 1;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,93 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:40 +0200
|
||||
Subject: [PATCH] x86/asm: Replace access to desc_struct:a/b fields
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The union inside of desc_struct which allows access to the raw u32 parts of
|
||||
the descriptors. This raw access part is about to go away.
|
||||
|
||||
Replace the few code parts which access those fields.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064958.120214366@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 9a98e7780022aa7cd201eb8a88a4f1d607b73cde)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8469c76c61ea9c3b86b596352d1148bace5ea706)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/xen/hypercall.h | 6 ++++--
|
||||
arch/x86/kernel/tls.c | 2 +-
|
||||
arch/x86/xen/enlighten_pv.c | 2 +-
|
||||
3 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
|
||||
index 11071fcd630e..9606688caa4b 100644
|
||||
--- a/arch/x86/include/asm/xen/hypercall.h
|
||||
+++ b/arch/x86/include/asm/xen/hypercall.h
|
||||
@@ -552,6 +552,8 @@ static inline void
|
||||
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
|
||||
struct desc_struct desc)
|
||||
{
|
||||
+ u32 *p = (u32 *) &desc;
|
||||
+
|
||||
mcl->op = __HYPERVISOR_update_descriptor;
|
||||
if (sizeof(maddr) == sizeof(long)) {
|
||||
mcl->args[0] = maddr;
|
||||
@@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
|
||||
} else {
|
||||
mcl->args[0] = maddr;
|
||||
mcl->args[1] = maddr >> 32;
|
||||
- mcl->args[2] = desc.a;
|
||||
- mcl->args[3] = desc.b;
|
||||
+ mcl->args[2] = *p++;
|
||||
+ mcl->args[3] = *p;
|
||||
}
|
||||
|
||||
trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
|
||||
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
|
||||
index dcd699baea1b..a106b9719c58 100644
|
||||
--- a/arch/x86/kernel/tls.c
|
||||
+++ b/arch/x86/kernel/tls.c
|
||||
@@ -93,7 +93,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
|
||||
|
||||
while (n-- > 0) {
|
||||
if (LDT_empty(info) || LDT_zero(info)) {
|
||||
- desc->a = desc->b = 0;
|
||||
+ memset(desc, 0, sizeof(*desc));
|
||||
} else {
|
||||
fill_ldt(desc, info);
|
||||
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 49ee3315b9f7..c76f5ff4d0d7 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -501,7 +501,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
|
||||
static inline bool desc_equal(const struct desc_struct *d1,
|
||||
const struct desc_struct *d2)
|
||||
{
|
||||
- return d1->a == d2->a && d1->b == d2->b;
|
||||
+ return !memcmp(d1, d2, sizeof(*d1));
|
||||
}
|
||||
|
||||
static void load_TLS_descriptor(struct thread_struct *t,
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,436 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Thu, 31 Aug 2017 19:42:49 +0200
|
||||
Subject: [PATCH] x86/xen: Get rid of paravirt op adjust_exception_frame
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
When running as Xen pv-guest the exception frame on the stack contains
|
||||
%r11 and %rcx additional to the other data pushed by the processor.
|
||||
|
||||
Instead of having a paravirt op being called for each exception type
|
||||
prepend the Xen specific code to each exception entry. When running as
|
||||
Xen pv-guest just use the exception entry with prepended instructions,
|
||||
otherwise use the entry without the Xen specific code.
|
||||
|
||||
[ tglx: Merged through tip to avoid ugly merge conflict ]
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Cc: boris.ostrovsky@oracle.com
|
||||
Cc: luto@amacapital.net
|
||||
Link: http://lkml.kernel.org/r/20170831174249.26853-1-jg@pfupf.net
|
||||
(backported from commit 5878d5d6fdef6447d73b0acc121ba445bef37f53)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 9a6fb927deb3ebbe831741ca82081714637181a7)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/paravirt.h | 5 --
|
||||
arch/x86/include/asm/paravirt_types.h | 3 --
|
||||
arch/x86/include/asm/proto.h | 3 ++
|
||||
arch/x86/include/asm/traps.h | 28 ++++++++--
|
||||
arch/x86/xen/xen-ops.h | 1 -
|
||||
arch/x86/kernel/asm-offsets_64.c | 1 -
|
||||
arch/x86/kernel/paravirt.c | 3 --
|
||||
arch/x86/xen/enlighten_pv.c | 98 +++++++++++++++++++++++------------
|
||||
arch/x86/xen/irq.c | 3 --
|
||||
arch/x86/entry/entry_64.S | 23 ++------
|
||||
arch/x86/entry/entry_64_compat.S | 1 -
|
||||
arch/x86/xen/xen-asm_64.S | 41 +++++++++++++--
|
||||
12 files changed, 133 insertions(+), 77 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
|
||||
index 9ccac1926587..c25dd22f7c70 100644
|
||||
--- a/arch/x86/include/asm/paravirt.h
|
||||
+++ b/arch/x86/include/asm/paravirt.h
|
||||
@@ -960,11 +960,6 @@ extern void default_banner(void);
|
||||
#define GET_CR2_INTO_RAX \
|
||||
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
|
||||
|
||||
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
|
||||
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
|
||||
- CLBR_NONE, \
|
||||
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
|
||||
-
|
||||
#define USERGS_SYSRET64 \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
||||
CLBR_NONE, \
|
||||
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
|
||||
index 9ffc36bfe4cd..6b64fc6367f2 100644
|
||||
--- a/arch/x86/include/asm/paravirt_types.h
|
||||
+++ b/arch/x86/include/asm/paravirt_types.h
|
||||
@@ -196,9 +196,6 @@ struct pv_irq_ops {
|
||||
void (*safe_halt)(void);
|
||||
void (*halt)(void);
|
||||
|
||||
-#ifdef CONFIG_X86_64
|
||||
- void (*adjust_exception_frame)(void);
|
||||
-#endif
|
||||
} __no_randomize_layout;
|
||||
|
||||
struct pv_mmu_ops {
|
||||
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
|
||||
index 8d3964fc5f91..b408b1886195 100644
|
||||
--- a/arch/x86/include/asm/proto.h
|
||||
+++ b/arch/x86/include/asm/proto.h
|
||||
@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
|
||||
void __end_entry_SYSENTER_compat(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
void entry_INT80_compat(void);
|
||||
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
+void xen_entry_INT80_compat(void);
|
||||
+#endif
|
||||
#endif
|
||||
|
||||
void x86_configure_nx(void);
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index b4f322d6c95f..feb89dbe359d 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -13,9 +13,6 @@ asmlinkage void divide_error(void);
|
||||
asmlinkage void debug(void);
|
||||
asmlinkage void nmi(void);
|
||||
asmlinkage void int3(void);
|
||||
-asmlinkage void xen_debug(void);
|
||||
-asmlinkage void xen_int3(void);
|
||||
-asmlinkage void xen_stack_segment(void);
|
||||
asmlinkage void overflow(void);
|
||||
asmlinkage void bounds(void);
|
||||
asmlinkage void invalid_op(void);
|
||||
@@ -56,6 +53,31 @@ asmlinkage void simd_coprocessor_error(void);
|
||||
#define trace_page_fault page_fault
|
||||
#endif
|
||||
|
||||
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
+asmlinkage void xen_divide_error(void);
|
||||
+asmlinkage void xen_xendebug(void);
|
||||
+asmlinkage void xen_xenint3(void);
|
||||
+asmlinkage void xen_nmi(void);
|
||||
+asmlinkage void xen_overflow(void);
|
||||
+asmlinkage void xen_bounds(void);
|
||||
+asmlinkage void xen_invalid_op(void);
|
||||
+asmlinkage void xen_device_not_available(void);
|
||||
+asmlinkage void xen_double_fault(void);
|
||||
+asmlinkage void xen_coprocessor_segment_overrun(void);
|
||||
+asmlinkage void xen_invalid_TSS(void);
|
||||
+asmlinkage void xen_segment_not_present(void);
|
||||
+asmlinkage void xen_stack_segment(void);
|
||||
+asmlinkage void xen_general_protection(void);
|
||||
+asmlinkage void xen_page_fault(void);
|
||||
+asmlinkage void xen_spurious_interrupt_bug(void);
|
||||
+asmlinkage void xen_coprocessor_error(void);
|
||||
+asmlinkage void xen_alignment_check(void);
|
||||
+#ifdef CONFIG_X86_MCE
|
||||
+asmlinkage void xen_machine_check(void);
|
||||
+#endif /* CONFIG_X86_MCE */
|
||||
+asmlinkage void xen_simd_coprocessor_error(void);
|
||||
+#endif
|
||||
+
|
||||
dotraplinkage void do_divide_error(struct pt_regs *, long);
|
||||
dotraplinkage void do_debug(struct pt_regs *, long);
|
||||
dotraplinkage void do_nmi(struct pt_regs *, long);
|
||||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
|
||||
index 70301ac0d414..c8a6d224f7ed 100644
|
||||
--- a/arch/x86/xen/xen-ops.h
|
||||
+++ b/arch/x86/xen/xen-ops.h
|
||||
@@ -138,7 +138,6 @@ __visible void xen_restore_fl_direct(unsigned long);
|
||||
__visible void xen_iret(void);
|
||||
__visible void xen_sysret32(void);
|
||||
__visible void xen_sysret64(void);
|
||||
-__visible void xen_adjust_exception_frame(void);
|
||||
|
||||
extern int xen_panic_handler_init(void);
|
||||
|
||||
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
|
||||
index 99332f550c48..cf42206926af 100644
|
||||
--- a/arch/x86/kernel/asm-offsets_64.c
|
||||
+++ b/arch/x86/kernel/asm-offsets_64.c
|
||||
@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
|
||||
int main(void)
|
||||
{
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
- OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
|
||||
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
|
||||
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
|
||||
BLANK();
|
||||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
|
||||
index bc0a849589bb..a14df9eecfed 100644
|
||||
--- a/arch/x86/kernel/paravirt.c
|
||||
+++ b/arch/x86/kernel/paravirt.c
|
||||
@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
|
||||
.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
|
||||
.safe_halt = native_safe_halt,
|
||||
.halt = native_halt,
|
||||
-#ifdef CONFIG_X86_64
|
||||
- .adjust_exception_frame = paravirt_nop,
|
||||
-#endif
|
||||
};
|
||||
|
||||
__visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index c76f5ff4d0d7..ae2a2e2d6362 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -586,6 +586,70 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
+struct trap_array_entry {
|
||||
+ void (*orig)(void);
|
||||
+ void (*xen)(void);
|
||||
+ bool ist_okay;
|
||||
+};
|
||||
+
|
||||
+static struct trap_array_entry trap_array[] = {
|
||||
+ { debug, xen_xendebug, true },
|
||||
+ { int3, xen_xenint3, true },
|
||||
+ { double_fault, xen_double_fault, true },
|
||||
+#ifdef CONFIG_X86_MCE
|
||||
+ { machine_check, xen_machine_check, true },
|
||||
+#endif
|
||||
+ { nmi, xen_nmi, true },
|
||||
+ { overflow, xen_overflow, false },
|
||||
+#ifdef CONFIG_IA32_EMULATION
|
||||
+ { entry_INT80_compat, xen_entry_INT80_compat, false },
|
||||
+#endif
|
||||
+ { page_fault, xen_page_fault, false },
|
||||
+ { divide_error, xen_divide_error, false },
|
||||
+ { bounds, xen_bounds, false },
|
||||
+ { invalid_op, xen_invalid_op, false },
|
||||
+ { device_not_available, xen_device_not_available, false },
|
||||
+ { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
|
||||
+ { invalid_TSS, xen_invalid_TSS, false },
|
||||
+ { segment_not_present, xen_segment_not_present, false },
|
||||
+ { stack_segment, xen_stack_segment, false },
|
||||
+ { general_protection, xen_general_protection, false },
|
||||
+ { spurious_interrupt_bug, xen_spurious_interrupt_bug, false },
|
||||
+ { coprocessor_error, xen_coprocessor_error, false },
|
||||
+ { alignment_check, xen_alignment_check, false },
|
||||
+ { simd_coprocessor_error, xen_simd_coprocessor_error, false },
|
||||
+};
|
||||
+
|
||||
+static bool get_trap_addr(void **addr, unsigned int ist)
|
||||
+{
|
||||
+ unsigned int nr;
|
||||
+ bool ist_okay = false;
|
||||
+
|
||||
+ /*
|
||||
+ * Replace trap handler addresses by Xen specific ones.
|
||||
+ * Check for known traps using IST and whitelist them.
|
||||
+ * The debugger ones are the only ones we care about.
|
||||
+ * Xen will handle faults like double_fault, * so we should never see
|
||||
+ * them. Warn if there's an unexpected IST-using fault handler.
|
||||
+ */
|
||||
+ for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
|
||||
+ struct trap_array_entry *entry = trap_array + nr;
|
||||
+
|
||||
+ if (*addr == entry->orig) {
|
||||
+ *addr = entry->xen;
|
||||
+ ist_okay = entry->ist_okay;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (WARN_ON(ist != 0 && !ist_okay))
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
struct trap_info *info)
|
||||
{
|
||||
@@ -598,40 +662,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
|
||||
addr = gate_offset(val);
|
||||
#ifdef CONFIG_X86_64
|
||||
- /*
|
||||
- * Look for known traps using IST, and substitute them
|
||||
- * appropriately. The debugger ones are the only ones we care
|
||||
- * about. Xen will handle faults like double_fault,
|
||||
- * so we should never see them. Warn if
|
||||
- * there's an unexpected IST-using fault handler.
|
||||
- */
|
||||
- if (addr == (unsigned long)debug)
|
||||
- addr = (unsigned long)xen_debug;
|
||||
- else if (addr == (unsigned long)int3)
|
||||
- addr = (unsigned long)xen_int3;
|
||||
- else if (addr == (unsigned long)stack_segment)
|
||||
- addr = (unsigned long)xen_stack_segment;
|
||||
- else if (addr == (unsigned long)double_fault) {
|
||||
- /* Don't need to handle these */
|
||||
+ if (!get_trap_addr((void **)&addr, val->bits.ist))
|
||||
return 0;
|
||||
-#ifdef CONFIG_X86_MCE
|
||||
- } else if (addr == (unsigned long)machine_check) {
|
||||
- /*
|
||||
- * when xen hypervisor inject vMCE to guest,
|
||||
- * use native mce handler to handle it
|
||||
- */
|
||||
- ;
|
||||
-#endif
|
||||
- } else if (addr == (unsigned long)nmi)
|
||||
- /*
|
||||
- * Use the native version as well.
|
||||
- */
|
||||
- ;
|
||||
- else {
|
||||
- /* Some other trap using IST? */
|
||||
- if (WARN_ON(val->bits.ist != 0))
|
||||
- return 0;
|
||||
- }
|
||||
#endif /* CONFIG_X86_64 */
|
||||
info->address = addr;
|
||||
|
||||
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
|
||||
index 33e92955e09d..d4eff5676cfa 100644
|
||||
--- a/arch/x86/xen/irq.c
|
||||
+++ b/arch/x86/xen/irq.c
|
||||
@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
|
||||
|
||||
.safe_halt = xen_safe_halt,
|
||||
.halt = xen_halt,
|
||||
-#ifdef CONFIG_X86_64
|
||||
- .adjust_exception_frame = xen_adjust_exception_frame,
|
||||
-#endif
|
||||
};
|
||||
|
||||
void __init xen_init_irq_ops(void)
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index dfabcbf8e813..c12260ef3e4b 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -829,7 +829,6 @@ ENTRY(\sym)
|
||||
.endif
|
||||
|
||||
ASM_CLAC
|
||||
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
|
||||
.ifeq \has_error_code
|
||||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||
@@ -975,7 +974,7 @@ ENTRY(do_softirq_own_stack)
|
||||
ENDPROC(do_softirq_own_stack)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
|
||||
+idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
|
||||
|
||||
/*
|
||||
* A note on the "critical region" in our callback handler.
|
||||
@@ -1042,8 +1041,6 @@ ENTRY(xen_failsafe_callback)
|
||||
movq 8(%rsp), %r11
|
||||
addq $0x30, %rsp
|
||||
pushq $0 /* RIP */
|
||||
- pushq %r11
|
||||
- pushq %rcx
|
||||
UNWIND_HINT_IRET_REGS offset=8
|
||||
jmp general_protection
|
||||
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
|
||||
@@ -1074,9 +1071,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
||||
idtentry stack_segment do_stack_segment has_error_code=1
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
-idtentry xen_debug do_debug has_error_code=0
|
||||
-idtentry xen_int3 do_int3 has_error_code=0
|
||||
-idtentry xen_stack_segment do_stack_segment has_error_code=1
|
||||
+idtentry xendebug do_debug has_error_code=0
|
||||
+idtentry xenint3 do_int3 has_error_code=0
|
||||
#endif
|
||||
|
||||
idtentry general_protection do_general_protection has_error_code=1
|
||||
@@ -1240,20 +1236,9 @@ ENTRY(error_exit)
|
||||
END(error_exit)
|
||||
|
||||
/* Runs on exception stack */
|
||||
+/* XXX: broken on Xen PV */
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
- /*
|
||||
- * Fix up the exception frame if we're on Xen.
|
||||
- * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
|
||||
- * one value to the stack on native, so it may clobber the rdx
|
||||
- * scratch slot, but it won't clobber any of the important
|
||||
- * slots past it.
|
||||
- *
|
||||
- * Xen is a different story, because the Xen frame itself overlaps
|
||||
- * the "NMI executing" variable.
|
||||
- */
|
||||
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
-
|
||||
/*
|
||||
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
||||
* the iretq it performs will take us out of NMI context.
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index 5314d7b8e5ad..d8468ba24be0 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -293,7 +293,6 @@ ENTRY(entry_INT80_compat)
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
*/
|
||||
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
ASM_CLAC /* Do this early to minimize exposure */
|
||||
SWAPGS
|
||||
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index 3a3b6a211584..dae2cc33afb5 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -16,11 +16,42 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
-ENTRY(xen_adjust_exception_frame)
|
||||
- mov 8+0(%rsp), %rcx
|
||||
- mov 8+8(%rsp), %r11
|
||||
- ret $16
|
||||
-ENDPROC(xen_adjust_exception_frame)
|
||||
+.macro xen_pv_trap name
|
||||
+ENTRY(xen_\name)
|
||||
+ pop %rcx
|
||||
+ pop %r11
|
||||
+ jmp \name
|
||||
+END(xen_\name)
|
||||
+.endm
|
||||
+
|
||||
+xen_pv_trap divide_error
|
||||
+xen_pv_trap debug
|
||||
+xen_pv_trap xendebug
|
||||
+xen_pv_trap int3
|
||||
+xen_pv_trap xenint3
|
||||
+xen_pv_trap nmi
|
||||
+xen_pv_trap overflow
|
||||
+xen_pv_trap bounds
|
||||
+xen_pv_trap invalid_op
|
||||
+xen_pv_trap device_not_available
|
||||
+xen_pv_trap double_fault
|
||||
+xen_pv_trap coprocessor_segment_overrun
|
||||
+xen_pv_trap invalid_TSS
|
||||
+xen_pv_trap segment_not_present
|
||||
+xen_pv_trap stack_segment
|
||||
+xen_pv_trap general_protection
|
||||
+xen_pv_trap page_fault
|
||||
+xen_pv_trap spurious_interrupt_bug
|
||||
+xen_pv_trap coprocessor_error
|
||||
+xen_pv_trap alignment_check
|
||||
+#ifdef CONFIG_X86_MCE
|
||||
+xen_pv_trap machine_check
|
||||
+#endif /* CONFIG_X86_MCE */
|
||||
+xen_pv_trap simd_coprocessor_error
|
||||
+#ifdef CONFIG_IA32_EMULATION
|
||||
+xen_pv_trap entry_INT80_compat
|
||||
+#endif
|
||||
+xen_pv_trap hypervisor_callback
|
||||
|
||||
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,390 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Mon, 4 Sep 2017 12:25:27 +0200
|
||||
Subject: [PATCH] x86/paravirt: Remove no longer used paravirt functions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
With removal of lguest some of the paravirt functions are no longer
|
||||
needed:
|
||||
|
||||
->read_cr4()
|
||||
->store_idt()
|
||||
->set_pmd_at()
|
||||
->set_pud_at()
|
||||
->pte_update()
|
||||
|
||||
Remove them.
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: akataria@vmware.com
|
||||
Cc: boris.ostrovsky@oracle.com
|
||||
Cc: chrisw@sous-sol.org
|
||||
Cc: jeremy@goop.org
|
||||
Cc: rusty@rustcorp.com.au
|
||||
Cc: virtualization@lists.linux-foundation.org
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Link: http://lkml.kernel.org/r/20170904102527.25409-1-jgross@suse.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 87930019c713873a1c3b9bd55dde46e81f70c8f1)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit edf3ab0080a6e79a300753e66929b0b7499eaec5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/desc.h | 3 +--
|
||||
arch/x86/include/asm/paravirt.h | 37 -----------------------------------
|
||||
arch/x86/include/asm/paravirt_types.h | 9 ---------
|
||||
arch/x86/include/asm/pgtable.h | 27 ++++---------------------
|
||||
arch/x86/include/asm/special_insns.h | 10 +++++-----
|
||||
arch/x86/kernel/paravirt.c | 5 -----
|
||||
arch/x86/kvm/vmx.c | 2 +-
|
||||
arch/x86/mm/pgtable.c | 7 +------
|
||||
arch/x86/xen/enlighten_pv.c | 2 --
|
||||
arch/x86/xen/mmu_pv.c | 2 --
|
||||
10 files changed, 12 insertions(+), 92 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
|
||||
index 57e502a4e92f..f995e5a09136 100644
|
||||
--- a/arch/x86/include/asm/desc.h
|
||||
+++ b/arch/x86/include/asm/desc.h
|
||||
@@ -120,7 +120,6 @@ static inline int desc_empty(const void *ptr)
|
||||
#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
|
||||
|
||||
#define store_gdt(dtr) native_store_gdt(dtr)
|
||||
-#define store_idt(dtr) native_store_idt(dtr)
|
||||
#define store_tr(tr) (tr = native_store_tr())
|
||||
|
||||
#define load_TLS(t, cpu) native_load_tls(t, cpu)
|
||||
@@ -241,7 +240,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
|
||||
asm volatile("sgdt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
-static inline void native_store_idt(struct desc_ptr *dtr)
|
||||
+static inline void store_idt(struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("sidt %0":"=m" (*dtr));
|
||||
}
|
||||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
|
||||
index c25dd22f7c70..12deec722cf0 100644
|
||||
--- a/arch/x86/include/asm/paravirt.h
|
||||
+++ b/arch/x86/include/asm/paravirt.h
|
||||
@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
|
||||
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
|
||||
}
|
||||
|
||||
-static inline unsigned long __read_cr4(void)
|
||||
-{
|
||||
- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
|
||||
-}
|
||||
-
|
||||
static inline void __write_cr4(unsigned long x)
|
||||
{
|
||||
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
|
||||
@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
|
||||
{
|
||||
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
|
||||
}
|
||||
-static inline void store_idt(struct desc_ptr *dtr)
|
||||
-{
|
||||
- PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
|
||||
-}
|
||||
static inline unsigned long paravirt_store_tr(void)
|
||||
{
|
||||
return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
|
||||
@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
|
||||
PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
|
||||
}
|
||||
|
||||
-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
|
||||
- pte_t *ptep)
|
||||
-{
|
||||
- PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
|
||||
-}
|
||||
-
|
||||
static inline pte_t __pte(pteval_t val)
|
||||
{
|
||||
pteval_t ret;
|
||||
@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
|
||||
}
|
||||
|
||||
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pmd_t *pmdp, pmd_t pmd)
|
||||
-{
|
||||
- if (sizeof(pmdval_t) > sizeof(long))
|
||||
- /* 5 arg words */
|
||||
- pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
|
||||
- else
|
||||
- PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
|
||||
- native_pmd_val(pmd));
|
||||
-}
|
||||
-
|
||||
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pud_t *pudp, pud_t pud)
|
||||
-{
|
||||
- if (sizeof(pudval_t) > sizeof(long))
|
||||
- /* 5 arg words */
|
||||
- pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
|
||||
- else
|
||||
- PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
|
||||
- native_pud_val(pud));
|
||||
-}
|
||||
-
|
||||
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
pmdval_t val = native_pmd_val(pmd);
|
||||
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
|
||||
index 6b64fc6367f2..42873edd9f9d 100644
|
||||
--- a/arch/x86/include/asm/paravirt_types.h
|
||||
+++ b/arch/x86/include/asm/paravirt_types.h
|
||||
@@ -107,7 +107,6 @@ struct pv_cpu_ops {
|
||||
unsigned long (*read_cr0)(void);
|
||||
void (*write_cr0)(unsigned long);
|
||||
|
||||
- unsigned long (*read_cr4)(void);
|
||||
void (*write_cr4)(unsigned long);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -119,8 +118,6 @@ struct pv_cpu_ops {
|
||||
void (*load_tr_desc)(void);
|
||||
void (*load_gdt)(const struct desc_ptr *);
|
||||
void (*load_idt)(const struct desc_ptr *);
|
||||
- /* store_gdt has been removed. */
|
||||
- void (*store_idt)(struct desc_ptr *);
|
||||
void (*set_ldt)(const void *desc, unsigned entries);
|
||||
unsigned long (*store_tr)(void);
|
||||
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
|
||||
@@ -245,12 +242,6 @@ struct pv_mmu_ops {
|
||||
void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval);
|
||||
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
|
||||
- void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
|
||||
- pmd_t *pmdp, pmd_t pmdval);
|
||||
- void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
|
||||
- pud_t *pudp, pud_t pudval);
|
||||
- void (*pte_update)(struct mm_struct *mm, unsigned long addr,
|
||||
- pte_t *ptep);
|
||||
|
||||
pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep);
|
||||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
|
||||
index 77037b6f1caa..bb8e9ea7deb4 100644
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -43,8 +43,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
||||
#else /* !CONFIG_PARAVIRT */
|
||||
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
|
||||
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
|
||||
-#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
|
||||
-#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
|
||||
|
||||
#define set_pte_atomic(ptep, pte) \
|
||||
native_set_pte_atomic(ptep, pte)
|
||||
@@ -75,8 +73,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
||||
#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
|
||||
#define pmd_clear(pmd) native_pmd_clear(pmd)
|
||||
|
||||
-#define pte_update(mm, addr, ptep) do { } while (0)
|
||||
-
|
||||
#define pgd_val(x) native_pgd_val(x)
|
||||
#define __pgd(x) native_make_pgd(x)
|
||||
|
||||
@@ -965,31 +961,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
native_set_pte(ptep, pte);
|
||||
}
|
||||
|
||||
-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pmd_t *pmdp , pmd_t pmd)
|
||||
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
+ pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
native_set_pmd(pmdp, pmd);
|
||||
}
|
||||
|
||||
-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pud_t *pudp, pud_t pud)
|
||||
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||
+ pud_t *pudp, pud_t pud)
|
||||
{
|
||||
native_set_pud(pudp, pud);
|
||||
}
|
||||
|
||||
-#ifndef CONFIG_PARAVIRT
|
||||
-/*
|
||||
- * Rules for using pte_update - it must be called after any PTE update which
|
||||
- * has not been done using the set_pte / clear_pte interfaces. It is used by
|
||||
- * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
|
||||
- * updates should either be sets, clears, or set_pte_atomic for P->P
|
||||
- * transitions, which means this hook should only be called for user PTEs.
|
||||
- * This hook implies a P->P protection or access change has taken place, which
|
||||
- * requires a subsequent TLB flush.
|
||||
- */
|
||||
-#define pte_update(mm, addr, ptep) do { } while (0)
|
||||
-#endif
|
||||
-
|
||||
/*
|
||||
* We only update the dirty/accessed state if we set
|
||||
* the dirty bit by hand in the kernel, since the hardware
|
||||
@@ -1017,7 +1000,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
pte_t pte = native_ptep_get_and_clear(ptep);
|
||||
- pte_update(mm, addr, ptep);
|
||||
return pte;
|
||||
}
|
||||
|
||||
@@ -1044,7 +1026,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
|
||||
- pte_update(mm, addr, ptep);
|
||||
}
|
||||
|
||||
#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
|
||||
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
|
||||
index 9efaabf5b54b..a24dfcf79f4a 100644
|
||||
--- a/arch/x86/include/asm/special_insns.h
|
||||
+++ b/arch/x86/include/asm/special_insns.h
|
||||
@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
|
||||
|
||||
extern asmlinkage void native_load_gs_index(unsigned);
|
||||
|
||||
+static inline unsigned long __read_cr4(void)
|
||||
+{
|
||||
+ return native_read_cr4();
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
|
||||
native_write_cr3(x);
|
||||
}
|
||||
|
||||
-static inline unsigned long __read_cr4(void)
|
||||
-{
|
||||
- return native_read_cr4();
|
||||
-}
|
||||
-
|
||||
static inline void __write_cr4(unsigned long x)
|
||||
{
|
||||
native_write_cr4(x);
|
||||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
|
||||
index a14df9eecfed..19a3e8f961c7 100644
|
||||
--- a/arch/x86/kernel/paravirt.c
|
||||
+++ b/arch/x86/kernel/paravirt.c
|
||||
@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
.set_debugreg = native_set_debugreg,
|
||||
.read_cr0 = native_read_cr0,
|
||||
.write_cr0 = native_write_cr0,
|
||||
- .read_cr4 = native_read_cr4,
|
||||
.write_cr4 = native_write_cr4,
|
||||
#ifdef CONFIG_X86_64
|
||||
.read_cr8 = native_read_cr8,
|
||||
@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
.set_ldt = native_set_ldt,
|
||||
.load_gdt = native_load_gdt,
|
||||
.load_idt = native_load_idt,
|
||||
- .store_idt = native_store_idt,
|
||||
.store_tr = native_store_tr,
|
||||
.load_tls = native_load_tls,
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
|
||||
.set_pte = native_set_pte,
|
||||
.set_pte_at = native_set_pte_at,
|
||||
.set_pmd = native_set_pmd,
|
||||
- .set_pmd_at = native_set_pmd_at,
|
||||
- .pte_update = paravirt_nop,
|
||||
|
||||
.ptep_modify_prot_start = __ptep_modify_prot_start,
|
||||
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
|
||||
@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
|
||||
.pmd_clear = native_pmd_clear,
|
||||
#endif
|
||||
.set_pud = native_set_pud,
|
||||
- .set_pud_at = native_set_pud_at,
|
||||
|
||||
.pmd_val = PTE_IDENT,
|
||||
.make_pmd = PTE_IDENT,
|
||||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
||||
index 7b447d126d17..dd4996a96c71 100644
|
||||
--- a/arch/x86/kvm/vmx.c
|
||||
+++ b/arch/x86/kvm/vmx.c
|
||||
@@ -5174,7 +5174,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
||||
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
|
||||
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
|
||||
|
||||
- native_store_idt(&dt);
|
||||
+ store_idt(&dt);
|
||||
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
|
||||
vmx->host_idt_base = dt.address;
|
||||
|
||||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
|
||||
index 508a708eb9a6..942391b5b639 100644
|
||||
--- a/arch/x86/mm/pgtable.c
|
||||
+++ b/arch/x86/mm/pgtable.c
|
||||
@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
|
||||
{
|
||||
int changed = !pte_same(*ptep, entry);
|
||||
|
||||
- if (changed && dirty) {
|
||||
+ if (changed && dirty)
|
||||
*ptep = entry;
|
||||
- pte_update(vma->vm_mm, address, ptep);
|
||||
- }
|
||||
|
||||
return changed;
|
||||
}
|
||||
@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
|
||||
(unsigned long *) &ptep->pte);
|
||||
|
||||
- if (ret)
|
||||
- pte_update(vma->vm_mm, addr, ptep);
|
||||
-
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index ae2a2e2d6362..69b9deff7e5c 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.read_cr0 = xen_read_cr0,
|
||||
.write_cr0 = xen_write_cr0,
|
||||
|
||||
- .read_cr4 = native_read_cr4,
|
||||
.write_cr4 = xen_write_cr4,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.alloc_ldt = xen_alloc_ldt,
|
||||
.free_ldt = xen_free_ldt,
|
||||
|
||||
- .store_idt = native_store_idt,
|
||||
.store_tr = xen_store_tr,
|
||||
|
||||
.write_ldt_entry = xen_write_ldt_entry,
|
||||
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
|
||||
index cab28cf2cffb..5f61b7e2e6b2 100644
|
||||
--- a/arch/x86/xen/mmu_pv.c
|
||||
+++ b/arch/x86/xen/mmu_pv.c
|
||||
@@ -2430,8 +2430,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||
.flush_tlb_single = xen_flush_tlb_single,
|
||||
.flush_tlb_others = xen_flush_tlb_others,
|
||||
|
||||
- .pte_update = paravirt_nop,
|
||||
-
|
||||
.pgd_alloc = xen_pgd_alloc,
|
||||
.pgd_free = xen_pgd_free,
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 20 Oct 2017 11:21:33 -0500
|
||||
Subject: [PATCH] x86/entry: Fix idtentry unwind hint
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This fixes the following ORC warning in the 'int3' entry code:
|
||||
|
||||
WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b
|
||||
|
||||
The ORC metadata had the wrong stack offset for the iret registers.
|
||||
|
||||
Their location on the stack is dependent on whether the exception has an
|
||||
error code.
|
||||
|
||||
Reported-and-tested-by: Andrei Vagin <avagin@virtuozzo.com>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations")
|
||||
Link: http://lkml.kernel.org/r/931d57f0551ed7979d5e7e05370d445c8e5137f8.1508516398.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 98990a33b77dda9babf91cb235654f6729e5702e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 266be2a5053230f6d0b6f27d3e8e9f28df40dd7e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index c12260ef3e4b..2e4fc6425f47 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -821,7 +821,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
ENTRY(\sym)
|
||||
- UNWIND_HINT_IRET_REGS offset=8
|
||||
+ UNWIND_HINT_IRET_REGS offset=\has_error_code*8
|
||||
|
||||
/* Sanity check */
|
||||
.if \shift_ist != -1 && \paranoid == 0
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,237 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sun, 10 Sep 2017 17:48:27 -0700
|
||||
Subject: [PATCH] x86/mm/64: Initialize CR4.PCIDE early
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
cpu_init() is weird: it's called rather late (after early
|
||||
identification and after most MMU state is initialized) on the boot
|
||||
CPU but is called extremely early (before identification) on secondary
|
||||
CPUs. It's called just late enough on the boot CPU that its CR4 value
|
||||
isn't propagated to mmu_cr4_features.
|
||||
|
||||
Even if we put CR4.PCIDE into mmu_cr4_features, we'd hit two
|
||||
problems. First, we'd crash in the trampoline code. That's
|
||||
fixable, and I tried that. It turns out that mmu_cr4_features is
|
||||
totally ignored by secondary_start_64(), though, so even with the
|
||||
trampoline code fixed, it wouldn't help.
|
||||
|
||||
This means that we don't currently have CR4.PCIDE reliably initialized
|
||||
before we start playing with cpu_tlbstate. This is very fragile and
|
||||
tends to cause boot failures if I make even small changes to the TLB
|
||||
handling code.
|
||||
|
||||
Make it more robust: initialize CR4.PCIDE earlier on the boot CPU
|
||||
and propagate it to secondary CPUs in start_secondary().
|
||||
|
||||
( Yes, this is ugly. I think we should have improved mmu_cr4_features
|
||||
to actually control CR4 during secondary bootup, but that would be
|
||||
fairly intrusive at this stage. )
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reported-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
|
||||
Tested-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-kernel@vger.kernel.org
|
||||
Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit c7ad5ad297e644601747d6dbee978bf85e14f7bc)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0e6a37a43aa876327e7d21881c09977da2d5c270)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 49 +++++++-------------------------------------
|
||||
arch/x86/kernel/setup.c | 5 ++++-
|
||||
arch/x86/kernel/smpboot.c | 8 +++++---
|
||||
arch/x86/mm/init.c | 34 ++++++++++++++++++++++++++++++
|
||||
4 files changed, 50 insertions(+), 46 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index 0b80ed14ff52..4be7b209a3d6 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
|
||||
__setup("nompx", x86_mpx_setup);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
-static int __init x86_pcid_setup(char *s)
|
||||
+static int __init x86_nopcid_setup(char *s)
|
||||
{
|
||||
- /* require an exact match without trailing characters */
|
||||
- if (strlen(s))
|
||||
- return 0;
|
||||
+ /* nopcid doesn't accept parameters */
|
||||
+ if (s)
|
||||
+ return -EINVAL;
|
||||
|
||||
/* do not emit a message if the feature is not present */
|
||||
if (!boot_cpu_has(X86_FEATURE_PCID))
|
||||
- return 1;
|
||||
+ return 0;
|
||||
|
||||
setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
pr_info("nopcid: PCID feature disabled\n");
|
||||
- return 1;
|
||||
+ return 0;
|
||||
}
|
||||
-__setup("nopcid", x86_pcid_setup);
|
||||
+early_param("nopcid", x86_nopcid_setup);
|
||||
#endif
|
||||
|
||||
static int __init x86_noinvpcid_setup(char *s)
|
||||
@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
-static void setup_pcid(struct cpuinfo_x86 *c)
|
||||
-{
|
||||
- if (cpu_has(c, X86_FEATURE_PCID)) {
|
||||
- if (cpu_has(c, X86_FEATURE_PGE)) {
|
||||
- /*
|
||||
- * We'd like to use cr4_set_bits_and_update_boot(),
|
||||
- * but we can't. CR4.PCIDE is special and can only
|
||||
- * be set in long mode, and the early CPU init code
|
||||
- * doesn't know this and would try to restore CR4.PCIDE
|
||||
- * prior to entering long mode.
|
||||
- *
|
||||
- * Instead, we rely on the fact that hotplug, resume,
|
||||
- * etc all fully restore CR4 before they write anything
|
||||
- * that could have nonzero PCID bits to CR3. CR4.PCIDE
|
||||
- * has no effect on the page tables themselves, so we
|
||||
- * don't need it to be restored early.
|
||||
- */
|
||||
- cr4_set_bits(X86_CR4_PCIDE);
|
||||
- } else {
|
||||
- /*
|
||||
- * flush_tlb_all(), as currently implemented, won't
|
||||
- * work if PCID is on but PGE is not. Since that
|
||||
- * combination doesn't exist on real hardware, there's
|
||||
- * no reason to try to fully support it, but it's
|
||||
- * polite to avoid corrupting data if we're on
|
||||
- * an improperly configured VM.
|
||||
- */
|
||||
- clear_cpu_cap(c, X86_FEATURE_PCID);
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Protection Keys are not available in 32-bit mode.
|
||||
*/
|
||||
@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
setup_smep(c);
|
||||
setup_smap(c);
|
||||
|
||||
- /* Set up PCID */
|
||||
- setup_pcid(c);
|
||||
-
|
||||
/*
|
||||
* The vendor-specific functions might have changed features.
|
||||
* Now we do "generic changes."
|
||||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
|
||||
index d7e8b983aa72..f964bfddfefd 100644
|
||||
--- a/arch/x86/kernel/setup.c
|
||||
+++ b/arch/x86/kernel/setup.c
|
||||
@@ -1174,8 +1174,11 @@ void __init setup_arch(char **cmdline_p)
|
||||
* with the current CR4 value. This may not be necessary, but
|
||||
* auditing all the early-boot CR4 manipulation would be needed to
|
||||
* rule it out.
|
||||
+ *
|
||||
+ * Mask off features that don't work outside long mode (just
|
||||
+ * PCIDE for now).
|
||||
*/
|
||||
- mmu_cr4_features = __read_cr4();
|
||||
+ mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
|
||||
|
||||
memblock_set_current_limit(get_max_mapped());
|
||||
|
||||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
|
||||
index 893fd8c849e2..d05006f6c31c 100644
|
||||
--- a/arch/x86/kernel/smpboot.c
|
||||
+++ b/arch/x86/kernel/smpboot.c
|
||||
@@ -227,10 +227,12 @@ static int enable_start_cpu0;
|
||||
static void notrace start_secondary(void *unused)
|
||||
{
|
||||
/*
|
||||
- * Don't put *anything* before cpu_init(), SMP booting is too
|
||||
- * fragile that we want to limit the things done here to the
|
||||
- * most necessary things.
|
||||
+ * Don't put *anything* except direct CPU state initialization
|
||||
+ * before cpu_init(), SMP booting is too fragile that we want to
|
||||
+ * limit the things done here to the most necessary things.
|
||||
*/
|
||||
+ if (boot_cpu_has(X86_FEATURE_PCID))
|
||||
+ __write_cr4(__read_cr4() | X86_CR4_PCIDE);
|
||||
cpu_init();
|
||||
x86_cpuinit.early_percpu_clock_init();
|
||||
preempt_disable();
|
||||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
||||
index bf3f1065d6ad..df2624b091a7 100644
|
||||
--- a/arch/x86/mm/init.c
|
||||
+++ b/arch/x86/mm/init.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/kaslr.h>
|
||||
#include <asm/hypervisor.h>
|
||||
+#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* We need to define the tracepoints somewhere, and tlb.c
|
||||
@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
|
||||
}
|
||||
}
|
||||
|
||||
+static void setup_pcid(void)
|
||||
+{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
+ /*
|
||||
+ * This can't be cr4_set_bits_and_update_boot() --
|
||||
+ * the trampoline code can't handle CR4.PCIDE and
|
||||
+ * it wouldn't do any good anyway. Despite the name,
|
||||
+ * cr4_set_bits_and_update_boot() doesn't actually
|
||||
+ * cause the bits in question to remain set all the
|
||||
+ * way through the secondary boot asm.
|
||||
+ *
|
||||
+ * Instead, we brute-force it and set CR4.PCIDE
|
||||
+ * manually in start_secondary().
|
||||
+ */
|
||||
+ cr4_set_bits(X86_CR4_PCIDE);
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * flush_tlb_all(), as currently implemented, won't
|
||||
+ * work if PCID is on but PGE is not. Since that
|
||||
+ * combination doesn't exist on real hardware, there's
|
||||
+ * no reason to try to fully support it, but it's
|
||||
+ * polite to avoid corrupting data if we're on
|
||||
+ * an improperly configured VM.
|
||||
+ */
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
+ }
|
||||
+ }
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_X86_32
|
||||
#define NR_RANGE_MR 3
|
||||
#else /* CONFIG_X86_64 */
|
||||
@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
|
||||
unsigned long end;
|
||||
|
||||
probe_page_size_mask();
|
||||
+ setup_pcid();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
end = max_pfn << PAGE_SHIFT;
|
||||
--
|
||||
2.14.2
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,641 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 11 Jul 2017 10:33:43 -0500
|
||||
Subject: [PATCH] objtool, x86: Add facility for asm code to provide unwind
|
||||
hints
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Some asm (and inline asm) code does special things to the stack which
|
||||
objtool can't understand. (Nor can GCC or GNU assembler, for that
|
||||
matter.) In such cases we need a facility for the code to provide
|
||||
annotations, so the unwinder can unwind through it.
|
||||
|
||||
This provides such a facility, in the form of unwind hints. They're
|
||||
similar to the GNU assembler .cfi* directives, but they give more
|
||||
information, and are needed in far fewer places, because objtool can
|
||||
fill in the blanks by following branches and adjusting the stack pointer
|
||||
for pushes and pops.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/0f5f3c9104fca559ff4088bece1d14ae3bca52d5.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 39358a033b2e4432052265c1fa0f36f572d8cfb5)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a1fed2e10e84d48643a09861c2d127968621813e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
tools/objtool/Makefile | 3 +
|
||||
arch/x86/include/asm/orc_types.h | 107 ++++++++++++++++++++
|
||||
arch/x86/include/asm/unwind_hints.h | 103 +++++++++++++++++++
|
||||
tools/objtool/check.h | 4 +-
|
||||
tools/objtool/orc_types.h | 22 +++++
|
||||
tools/objtool/check.c | 191 +++++++++++++++++++++++++++++++++---
|
||||
6 files changed, 417 insertions(+), 13 deletions(-)
|
||||
create mode 100644 arch/x86/include/asm/orc_types.h
|
||||
create mode 100644 arch/x86/include/asm/unwind_hints.h
|
||||
|
||||
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
|
||||
index 0e2765e243c0..3a6425fefc43 100644
|
||||
--- a/tools/objtool/Makefile
|
||||
+++ b/tools/objtool/Makefile
|
||||
@@ -52,6 +52,9 @@ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
|
||||
diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
|
||||
diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
|
||||
|| echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
|
||||
+ @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
|
||||
+ diff ../../arch/x86/include/asm/orc_types.h orc_types.h >/dev/null) \
|
||||
+ || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
|
||||
$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
|
||||
|
||||
|
||||
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
|
||||
new file mode 100644
|
||||
index 000000000000..7dc777a6cb40
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/include/asm/orc_types.h
|
||||
@@ -0,0 +1,107 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
+ *
|
||||
+ * This program is free software; you can redistribute it and/or
|
||||
+ * modify it under the terms of the GNU General Public License
|
||||
+ * as published by the Free Software Foundation; either version 2
|
||||
+ * of the License, or (at your option) any later version.
|
||||
+ *
|
||||
+ * This program is distributed in the hope that it will be useful,
|
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+ * GNU General Public License for more details.
|
||||
+ *
|
||||
+ * You should have received a copy of the GNU General Public License
|
||||
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
+ */
|
||||
+
|
||||
+#ifndef _ORC_TYPES_H
|
||||
+#define _ORC_TYPES_H
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <linux/compiler.h>
|
||||
+
|
||||
+/*
|
||||
+ * The ORC_REG_* registers are base registers which are used to find other
|
||||
+ * registers on the stack.
|
||||
+ *
|
||||
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
|
||||
+ * address of the previous frame: the caller's SP before it called the current
|
||||
+ * function.
|
||||
+ *
|
||||
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
|
||||
+ * the current frame.
|
||||
+ *
|
||||
+ * The most commonly used base registers are SP and BP -- which the previous SP
|
||||
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
|
||||
+ * usually based on.
|
||||
+ *
|
||||
+ * The rest of the base registers are needed for special cases like entry code
|
||||
+ * and GCC realigned stacks.
|
||||
+ */
|
||||
+#define ORC_REG_UNDEFINED 0
|
||||
+#define ORC_REG_PREV_SP 1
|
||||
+#define ORC_REG_DX 2
|
||||
+#define ORC_REG_DI 3
|
||||
+#define ORC_REG_BP 4
|
||||
+#define ORC_REG_SP 5
|
||||
+#define ORC_REG_R10 6
|
||||
+#define ORC_REG_R13 7
|
||||
+#define ORC_REG_BP_INDIRECT 8
|
||||
+#define ORC_REG_SP_INDIRECT 9
|
||||
+#define ORC_REG_MAX 15
|
||||
+
|
||||
+/*
|
||||
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
|
||||
+ * caller's SP right before it made the call). Used for all callable
|
||||
+ * functions, i.e. all C code and all callable asm functions.
|
||||
+ *
|
||||
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
|
||||
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
|
||||
+ *
|
||||
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
|
||||
+ * points to the iret return frame.
|
||||
+ *
|
||||
+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They
|
||||
+ * aren't used in struct orc_entry due to size and complexity constraints.
|
||||
+ * Objtool converts them to real types when it converts the hints to orc
|
||||
+ * entries.
|
||||
+ */
|
||||
+#define ORC_TYPE_CALL 0
|
||||
+#define ORC_TYPE_REGS 1
|
||||
+#define ORC_TYPE_REGS_IRET 2
|
||||
+#define UNWIND_HINT_TYPE_SAVE 3
|
||||
+#define UNWIND_HINT_TYPE_RESTORE 4
|
||||
+
|
||||
+#ifndef __ASSEMBLY__
|
||||
+/*
|
||||
+ * This struct is more or less a vastly simplified version of the DWARF Call
|
||||
+ * Frame Information standard. It contains only the necessary parts of DWARF
|
||||
+ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the
|
||||
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
|
||||
+ * the stack for a given code address. Each instance of the struct corresponds
|
||||
+ * to one or more code locations.
|
||||
+ */
|
||||
+struct orc_entry {
|
||||
+ s16 sp_offset;
|
||||
+ s16 bp_offset;
|
||||
+ unsigned sp_reg:4;
|
||||
+ unsigned bp_reg:4;
|
||||
+ unsigned type:2;
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * This struct is used by asm and inline asm code to manually annotate the
|
||||
+ * location of registers on the stack for the ORC unwinder.
|
||||
+ *
|
||||
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
|
||||
+ */
|
||||
+struct unwind_hint {
|
||||
+ u32 ip;
|
||||
+ s16 sp_offset;
|
||||
+ u8 sp_reg;
|
||||
+ u8 type;
|
||||
+};
|
||||
+#endif /* __ASSEMBLY__ */
|
||||
+
|
||||
+#endif /* _ORC_TYPES_H */
|
||||
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
|
||||
new file mode 100644
|
||||
index 000000000000..5e02b11c9b86
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/include/asm/unwind_hints.h
|
||||
@@ -0,0 +1,103 @@
|
||||
+#ifndef _ASM_X86_UNWIND_HINTS_H
|
||||
+#define _ASM_X86_UNWIND_HINTS_H
|
||||
+
|
||||
+#include "orc_types.h"
|
||||
+
|
||||
+#ifdef __ASSEMBLY__
|
||||
+
|
||||
+/*
|
||||
+ * In asm, there are two kinds of code: normal C-type callable functions and
|
||||
+ * the rest. The normal callable functions can be called by other code, and
|
||||
+ * don't do anything unusual with the stack. Such normal callable functions
|
||||
+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
|
||||
+ * category. In this case, no special debugging annotations are needed because
|
||||
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
|
||||
+ * at runtime.
|
||||
+ *
|
||||
+ * Anything which doesn't fall into the above category, such as syscall and
|
||||
+ * interrupt handlers, tends to not be called directly by other functions, and
|
||||
+ * often does unusual non-C-function-type things with the stack pointer. Such
|
||||
+ * code needs to be annotated such that objtool can understand it. The
|
||||
+ * following CFI hint macros are for this type of code.
|
||||
+ *
|
||||
+ * These macros provide hints to objtool about the state of the stack at each
|
||||
+ * instruction. Objtool starts from the hints and follows the code flow,
|
||||
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
|
||||
+ * the debuginfo as necessary. It will also warn if it sees any
|
||||
+ * inconsistencies.
|
||||
+ */
|
||||
+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
|
||||
+#ifdef CONFIG_STACK_VALIDATION
|
||||
+.Lunwind_hint_ip_\@:
|
||||
+ .pushsection .discard.unwind_hints
|
||||
+ /* struct unwind_hint */
|
||||
+ .long .Lunwind_hint_ip_\@ - .
|
||||
+ .short \sp_offset
|
||||
+ .byte \sp_reg
|
||||
+ .byte \type
|
||||
+ .popsection
|
||||
+#endif
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_EMPTY
|
||||
+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
|
||||
+ .if \base == %rsp && \indirect
|
||||
+ .set sp_reg, ORC_REG_SP_INDIRECT
|
||||
+ .elseif \base == %rsp
|
||||
+ .set sp_reg, ORC_REG_SP
|
||||
+ .elseif \base == %rbp
|
||||
+ .set sp_reg, ORC_REG_BP
|
||||
+ .elseif \base == %rdi
|
||||
+ .set sp_reg, ORC_REG_DI
|
||||
+ .elseif \base == %rdx
|
||||
+ .set sp_reg, ORC_REG_DX
|
||||
+ .elseif \base == %r10
|
||||
+ .set sp_reg, ORC_REG_R10
|
||||
+ .else
|
||||
+ .error "UNWIND_HINT_REGS: bad base register"
|
||||
+ .endif
|
||||
+
|
||||
+ .set sp_offset, \offset
|
||||
+
|
||||
+ .if \iret
|
||||
+ .set type, ORC_TYPE_REGS_IRET
|
||||
+ .elseif \extra == 0
|
||||
+ .set type, ORC_TYPE_REGS_IRET
|
||||
+ .set sp_offset, \offset + (16*8)
|
||||
+ .else
|
||||
+ .set type, ORC_TYPE_REGS
|
||||
+ .endif
|
||||
+
|
||||
+ UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
|
||||
+ UNWIND_HINT_REGS base=\base offset=\offset iret=1
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_FUNC sp_offset=8
|
||||
+ UNWIND_HINT sp_offset=\sp_offset
|
||||
+.endm
|
||||
+
|
||||
+#else /* !__ASSEMBLY__ */
|
||||
+
|
||||
+#define UNWIND_HINT(sp_reg, sp_offset, type) \
|
||||
+ "987: \n\t" \
|
||||
+ ".pushsection .discard.unwind_hints\n\t" \
|
||||
+ /* struct unwind_hint */ \
|
||||
+ ".long 987b - .\n\t" \
|
||||
+ ".short " __stringify(sp_offset) "\n\t" \
|
||||
+ ".byte " __stringify(sp_reg) "\n\t" \
|
||||
+ ".byte " __stringify(type) "\n\t" \
|
||||
+ ".popsection\n\t"
|
||||
+
|
||||
+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
|
||||
+
|
||||
+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
|
||||
+
|
||||
+#endif /* __ASSEMBLY__ */
|
||||
+
|
||||
+#endif /* _ASM_X86_UNWIND_HINTS_H */
|
||||
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
|
||||
index 046874bbe226..ac3d4b13f17b 100644
|
||||
--- a/tools/objtool/check.h
|
||||
+++ b/tools/objtool/check.h
|
||||
@@ -43,7 +43,7 @@ struct instruction {
|
||||
unsigned int len;
|
||||
unsigned char type;
|
||||
unsigned long immediate;
|
||||
- bool alt_group, visited, dead_end, ignore;
|
||||
+ bool alt_group, visited, dead_end, ignore, hint, save, restore;
|
||||
struct symbol *call_dest;
|
||||
struct instruction *jump_dest;
|
||||
struct list_head alts;
|
||||
@@ -58,7 +58,7 @@ struct objtool_file {
|
||||
struct list_head insn_list;
|
||||
DECLARE_HASHTABLE(insn_hash, 16);
|
||||
struct section *rodata, *whitelist;
|
||||
- bool ignore_unreachables, c_file;
|
||||
+ bool ignore_unreachables, c_file, hints;
|
||||
};
|
||||
|
||||
int check(const char *objname, bool nofp, bool orc);
|
||||
diff --git a/tools/objtool/orc_types.h b/tools/objtool/orc_types.h
|
||||
index fc5cf6cffd9a..9c9dc579bd7d 100644
|
||||
--- a/tools/objtool/orc_types.h
|
||||
+++ b/tools/objtool/orc_types.h
|
||||
@@ -61,11 +61,19 @@
|
||||
*
|
||||
* ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
|
||||
* points to the iret return frame.
|
||||
+ *
|
||||
+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They
|
||||
+ * aren't used in struct orc_entry due to size and complexity constraints.
|
||||
+ * Objtool converts them to real types when it converts the hints to orc
|
||||
+ * entries.
|
||||
*/
|
||||
#define ORC_TYPE_CALL 0
|
||||
#define ORC_TYPE_REGS 1
|
||||
#define ORC_TYPE_REGS_IRET 2
|
||||
+#define UNWIND_HINT_TYPE_SAVE 3
|
||||
+#define UNWIND_HINT_TYPE_RESTORE 4
|
||||
|
||||
+#ifndef __ASSEMBLY__
|
||||
/*
|
||||
* This struct is more or less a vastly simplified version of the DWARF Call
|
||||
* Frame Information standard. It contains only the necessary parts of DWARF
|
||||
@@ -82,4 +90,18 @@ struct orc_entry {
|
||||
unsigned type:2;
|
||||
} __packed;
|
||||
|
||||
+/*
|
||||
+ * This struct is used by asm and inline asm code to manually annotate the
|
||||
+ * location of registers on the stack for the ORC unwinder.
|
||||
+ *
|
||||
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
|
||||
+ */
|
||||
+struct unwind_hint {
|
||||
+ u32 ip;
|
||||
+ s16 sp_offset;
|
||||
+ u8 sp_reg;
|
||||
+ u8 type;
|
||||
+};
|
||||
+#endif /* __ASSEMBLY__ */
|
||||
+
|
||||
#endif /* _ORC_TYPES_H */
|
||||
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
|
||||
index cb57c526ba17..368275de5f23 100644
|
||||
--- a/tools/objtool/check.c
|
||||
+++ b/tools/objtool/check.c
|
||||
@@ -100,7 +100,6 @@ static bool gcov_enabled(struct objtool_file *file)
|
||||
static bool ignore_func(struct objtool_file *file, struct symbol *func)
|
||||
{
|
||||
struct rela *rela;
|
||||
- struct instruction *insn;
|
||||
|
||||
/* check for STACK_FRAME_NON_STANDARD */
|
||||
if (file->whitelist && file->whitelist->rela)
|
||||
@@ -113,11 +112,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func)
|
||||
return true;
|
||||
}
|
||||
|
||||
- /* check if it has a context switching instruction */
|
||||
- func_for_each_insn(file, func, insn)
|
||||
- if (insn->type == INSN_CONTEXT_SWITCH)
|
||||
- return true;
|
||||
-
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -879,6 +873,99 @@ static int add_switch_table_alts(struct objtool_file *file)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int read_unwind_hints(struct objtool_file *file)
|
||||
+{
|
||||
+ struct section *sec, *relasec;
|
||||
+ struct rela *rela;
|
||||
+ struct unwind_hint *hint;
|
||||
+ struct instruction *insn;
|
||||
+ struct cfi_reg *cfa;
|
||||
+ int i;
|
||||
+
|
||||
+ sec = find_section_by_name(file->elf, ".discard.unwind_hints");
|
||||
+ if (!sec)
|
||||
+ return 0;
|
||||
+
|
||||
+ relasec = sec->rela;
|
||||
+ if (!relasec) {
|
||||
+ WARN("missing .rela.discard.unwind_hints section");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (sec->len % sizeof(struct unwind_hint)) {
|
||||
+ WARN("struct unwind_hint size mismatch");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ file->hints = true;
|
||||
+
|
||||
+ for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
|
||||
+ hint = (struct unwind_hint *)sec->data->d_buf + i;
|
||||
+
|
||||
+ rela = find_rela_by_dest(sec, i * sizeof(*hint));
|
||||
+ if (!rela) {
|
||||
+ WARN("can't find rela for unwind_hints[%d]", i);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ insn = find_insn(file, rela->sym->sec, rela->addend);
|
||||
+ if (!insn) {
|
||||
+ WARN("can't find insn for unwind_hints[%d]", i);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ cfa = &insn->state.cfa;
|
||||
+
|
||||
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
|
||||
+ insn->save = true;
|
||||
+ continue;
|
||||
+
|
||||
+ } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
|
||||
+ insn->restore = true;
|
||||
+ insn->hint = true;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ insn->hint = true;
|
||||
+
|
||||
+ switch (hint->sp_reg) {
|
||||
+ case ORC_REG_UNDEFINED:
|
||||
+ cfa->base = CFI_UNDEFINED;
|
||||
+ break;
|
||||
+ case ORC_REG_SP:
|
||||
+ cfa->base = CFI_SP;
|
||||
+ break;
|
||||
+ case ORC_REG_BP:
|
||||
+ cfa->base = CFI_BP;
|
||||
+ break;
|
||||
+ case ORC_REG_SP_INDIRECT:
|
||||
+ cfa->base = CFI_SP_INDIRECT;
|
||||
+ break;
|
||||
+ case ORC_REG_R10:
|
||||
+ cfa->base = CFI_R10;
|
||||
+ break;
|
||||
+ case ORC_REG_R13:
|
||||
+ cfa->base = CFI_R13;
|
||||
+ break;
|
||||
+ case ORC_REG_DI:
|
||||
+ cfa->base = CFI_DI;
|
||||
+ break;
|
||||
+ case ORC_REG_DX:
|
||||
+ cfa->base = CFI_DX;
|
||||
+ break;
|
||||
+ default:
|
||||
+ WARN_FUNC("unsupported unwind_hint sp base reg %d",
|
||||
+ insn->sec, insn->offset, hint->sp_reg);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ cfa->offset = hint->sp_offset;
|
||||
+ insn->state.type = hint->type;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int decode_sections(struct objtool_file *file)
|
||||
{
|
||||
int ret;
|
||||
@@ -909,6 +996,10 @@ static int decode_sections(struct objtool_file *file)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
+ ret = read_unwind_hints(file);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1382,7 +1473,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
struct insn_state state)
|
||||
{
|
||||
struct alternative *alt;
|
||||
- struct instruction *insn;
|
||||
+ struct instruction *insn, *next_insn;
|
||||
struct section *sec;
|
||||
struct symbol *func = NULL;
|
||||
int ret;
|
||||
@@ -1397,6 +1488,8 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
}
|
||||
|
||||
while (1) {
|
||||
+ next_insn = next_insn_same_sec(file, insn);
|
||||
+
|
||||
if (file->c_file && insn->func) {
|
||||
if (func && func != insn->func) {
|
||||
WARN("%s() falls through to next function %s()",
|
||||
@@ -1414,13 +1507,54 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
}
|
||||
|
||||
if (insn->visited) {
|
||||
- if (!!insn_state_match(insn, &state))
|
||||
+ if (!insn->hint && !insn_state_match(insn, &state))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
- insn->state = state;
|
||||
+ if (insn->hint) {
|
||||
+ if (insn->restore) {
|
||||
+ struct instruction *save_insn, *i;
|
||||
+
|
||||
+ i = insn;
|
||||
+ save_insn = NULL;
|
||||
+ func_for_each_insn_continue_reverse(file, func, i) {
|
||||
+ if (i->save) {
|
||||
+ save_insn = i;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!save_insn) {
|
||||
+ WARN_FUNC("no corresponding CFI save for CFI restore",
|
||||
+ sec, insn->offset);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ if (!save_insn->visited) {
|
||||
+ /*
|
||||
+ * Oops, no state to copy yet.
|
||||
+ * Hopefully we can reach this
|
||||
+ * instruction from another branch
|
||||
+ * after the save insn has been
|
||||
+ * visited.
|
||||
+ */
|
||||
+ if (insn == first)
|
||||
+ return 0;
|
||||
+
|
||||
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
|
||||
+ sec, insn->offset);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ insn->state = save_insn->state;
|
||||
+ }
|
||||
+
|
||||
+ state = insn->state;
|
||||
+
|
||||
+ } else
|
||||
+ insn->state = state;
|
||||
|
||||
insn->visited = true;
|
||||
|
||||
@@ -1497,6 +1631,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
|
||||
return 0;
|
||||
|
||||
+ case INSN_CONTEXT_SWITCH:
|
||||
+ if (func && (!next_insn || !next_insn->hint)) {
|
||||
+ WARN_FUNC("unsupported instruction in callable function",
|
||||
+ sec, insn->offset);
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+
|
||||
case INSN_STACK:
|
||||
if (update_insn_state(insn, &state))
|
||||
return -1;
|
||||
@@ -1510,7 +1652,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
if (insn->dead_end)
|
||||
return 0;
|
||||
|
||||
- insn = next_insn_same_sec(file, insn);
|
||||
+ insn = next_insn;
|
||||
if (!insn) {
|
||||
WARN("%s: unexpected end of section", sec->name);
|
||||
return 1;
|
||||
@@ -1520,6 +1662,27 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int validate_unwind_hints(struct objtool_file *file)
|
||||
+{
|
||||
+ struct instruction *insn;
|
||||
+ int ret, warnings = 0;
|
||||
+ struct insn_state state;
|
||||
+
|
||||
+ if (!file->hints)
|
||||
+ return 0;
|
||||
+
|
||||
+ clear_insn_state(&state);
|
||||
+
|
||||
+ for_each_insn(file, insn) {
|
||||
+ if (insn->hint && !insn->visited) {
|
||||
+ ret = validate_branch(file, insn, state);
|
||||
+ warnings += ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return warnings;
|
||||
+}
|
||||
+
|
||||
static bool is_kasan_insn(struct instruction *insn)
|
||||
{
|
||||
return (insn->type == INSN_CALL &&
|
||||
@@ -1665,8 +1828,9 @@ int check(const char *_objname, bool _nofp, bool orc)
|
||||
hash_init(file.insn_hash);
|
||||
file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
|
||||
file.rodata = find_section_by_name(file.elf, ".rodata");
|
||||
- file.ignore_unreachables = false;
|
||||
file.c_file = find_section_by_name(file.elf, ".comment");
|
||||
+ file.ignore_unreachables = false;
|
||||
+ file.hints = false;
|
||||
|
||||
arch_initial_func_cfi_state(&initial_func_cfi);
|
||||
|
||||
@@ -1683,6 +1847,11 @@ int check(const char *_objname, bool _nofp, bool orc)
|
||||
goto out;
|
||||
warnings += ret;
|
||||
|
||||
+ ret = validate_unwind_hints(&file);
|
||||
+ if (ret < 0)
|
||||
+ goto out;
|
||||
+ warnings += ret;
|
||||
+
|
||||
if (!warnings) {
|
||||
ret = validate_reachable_instructions(&file);
|
||||
if (ret < 0)
|
||||
--
|
||||
2.14.2
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,171 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 25 Jul 2017 08:54:24 -0500
|
||||
Subject: [PATCH] x86/kconfig: Consolidate unwinders into multiple choice
|
||||
selection
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
There are three mutually exclusive unwinders. Make that more obvious by
|
||||
combining them into a multiple-choice selection:
|
||||
|
||||
CONFIG_FRAME_POINTER_UNWINDER
|
||||
CONFIG_ORC_UNWINDER
|
||||
CONFIG_GUESS_UNWINDER (if CONFIG_EXPERT=y)
|
||||
|
||||
Frame pointers are still the default (for now).
|
||||
|
||||
The old CONFIG_FRAME_POINTER option is still used in some
|
||||
arch-independent places, so keep it around, but make it
|
||||
invisible to the user on x86 - it's now selected by
|
||||
CONFIG_FRAME_POINTER_UNWINDER=y.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/20170725135424.zukjmgpz3plf5pmt@treble
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 81d387190039c14edac8de2b3ec789beb899afd9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 26ddacc1e6333555e4a6bd63c4c935b323509f92)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/unwind.h | 4 ++--
|
||||
arch/x86/Kconfig | 3 +--
|
||||
arch/x86/Kconfig.debug | 45 +++++++++++++++++++++++++++++++++++++------
|
||||
arch/x86/configs/tiny.config | 2 ++
|
||||
4 files changed, 44 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
|
||||
index 25b8d31a007d..e9f793e2df7a 100644
|
||||
--- a/arch/x86/include/asm/unwind.h
|
||||
+++ b/arch/x86/include/asm/unwind.h
|
||||
@@ -16,7 +16,7 @@ struct unwind_state {
|
||||
bool signal, full_regs;
|
||||
unsigned long sp, bp, ip;
|
||||
struct pt_regs *regs;
|
||||
-#elif defined(CONFIG_FRAME_POINTER)
|
||||
+#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
bool got_irq;
|
||||
unsigned long *bp, *orig_sp, ip;
|
||||
struct pt_regs *regs;
|
||||
@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
__unwind_start(state, task, regs, first_frame);
|
||||
}
|
||||
|
||||
-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER)
|
||||
+#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index d6f45f6d1054..3a0b8cb57caf 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -73,7 +73,6 @@ config X86
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
- select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||
select ARCH_WANTS_THP_SWAP if X86_64
|
||||
select BUILDTIME_EXTABLE_SORT
|
||||
@@ -169,7 +168,7 @@ config X86
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
- select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER && STACK_VALIDATION
|
||||
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index d5bca2ec8a74..c441b5d65ec8 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -356,6 +356,29 @@ config PUNIT_ATOM_DEBUG
|
||||
The current power state can be read from
|
||||
/sys/kernel/debug/punit_atom/dev_power_state
|
||||
|
||||
+choice
|
||||
+ prompt "Choose kernel unwinder"
|
||||
+ default FRAME_POINTER_UNWINDER
|
||||
+ ---help---
|
||||
+ This determines which method will be used for unwinding kernel stack
|
||||
+ traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||
+ livepatch, lockdep, and more.
|
||||
+
|
||||
+config FRAME_POINTER_UNWINDER
|
||||
+ bool "Frame pointer unwinder"
|
||||
+ select FRAME_POINTER
|
||||
+ ---help---
|
||||
+ This option enables the frame pointer unwinder for unwinding kernel
|
||||
+ stack traces.
|
||||
+
|
||||
+ The unwinder itself is fast and it uses less RAM than the ORC
|
||||
+ unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
+ overall performance will degrade by roughly 5-10%.
|
||||
+
|
||||
+ This option is recommended if you want to use the livepatch
|
||||
+ consistency model, as this is currently the only way to get a
|
||||
+ reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
+
|
||||
config ORC_UNWINDER
|
||||
bool "ORC unwinder"
|
||||
depends on X86_64
|
||||
@@ -373,12 +396,22 @@ config ORC_UNWINDER
|
||||
Enabling this option will increase the kernel's runtime memory usage
|
||||
by roughly 2-4MB, depending on your kernel config.
|
||||
|
||||
-config FRAME_POINTER_UNWINDER
|
||||
- def_bool y
|
||||
- depends on !ORC_UNWINDER && FRAME_POINTER
|
||||
-
|
||||
config GUESS_UNWINDER
|
||||
- def_bool y
|
||||
- depends on !ORC_UNWINDER && !FRAME_POINTER
|
||||
+ bool "Guess unwinder"
|
||||
+ depends on EXPERT
|
||||
+ ---help---
|
||||
+ This option enables the "guess" unwinder for unwinding kernel stack
|
||||
+ traces. It scans the stack and reports every kernel text address it
|
||||
+ finds. Some of the addresses it reports may be incorrect.
|
||||
+
|
||||
+ While this option often produces false positives, it can still be
|
||||
+ useful in many cases. Unlike the other unwinders, it has no runtime
|
||||
+ overhead.
|
||||
+
|
||||
+endchoice
|
||||
+
|
||||
+config FRAME_POINTER
|
||||
+ depends on !ORC_UNWINDER && !GUESS_UNWINDER
|
||||
+ bool
|
||||
|
||||
endmenu
|
||||
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
|
||||
index 4b429df40d7a..550cd5012b73 100644
|
||||
--- a/arch/x86/configs/tiny.config
|
||||
+++ b/arch/x86/configs/tiny.config
|
||||
@@ -1,3 +1,5 @@
|
||||
CONFIG_NOHIGHMEM=y
|
||||
# CONFIG_HIGHMEM4G is not set
|
||||
# CONFIG_HIGHMEM64G is not set
|
||||
+CONFIG_GUESS_UNWINDER=y
|
||||
+# CONFIG_FRAME_POINTER_UNWINDER is not set
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,51 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 3 Oct 2017 20:10:36 -0500
|
||||
Subject: [PATCH] objtool: Upgrade libelf-devel warning to error for
|
||||
CONFIG_ORC_UNWINDER
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
With CONFIG_ORC_UNWINDER, if the user doesn't have libelf-devel
|
||||
installed, and they don't see the make warning, their ORC unwinder will
|
||||
be silently broken. Upgrade the warning to an error.
|
||||
|
||||
Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/d9dfc39fb8240998820f9efb233d283a1ee96084.1507079417.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 3dd40cb320fee7c23b574ab821ce140ccd1281c9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c413466a72ca533ec126ebc0c5bb579ae0c96b1d)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Makefile | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 8e14a926fc94..490ce18685ea 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -965,7 +965,11 @@ ifdef CONFIG_STACK_VALIDATION
|
||||
ifeq ($(has_libelf),1)
|
||||
objtool_target := tools/objtool FORCE
|
||||
else
|
||||
- $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ ifdef CONFIG_ORC_UNWINDER
|
||||
+ $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ else
|
||||
+ $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ endif
|
||||
SKIP_STACK_VALIDATION := 1
|
||||
export SKIP_STACK_VALIDATION
|
||||
endif
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,82 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Wed, 26 Jul 2017 07:16:30 -0700
|
||||
Subject: [PATCH] x86/ldt/64: Refresh DS and ES when modify_ldt changes an
|
||||
entry
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
On x86_32, modify_ldt() implicitly refreshes the cached DS and ES
|
||||
segments because they are refreshed on return to usermode.
|
||||
|
||||
On x86_64, they're not refreshed on return to usermode. To improve
|
||||
determinism and match x86_32's behavior, refresh them when we update
|
||||
the LDT.
|
||||
|
||||
This avoids a situation in which the DS points to a descriptor that is
|
||||
changed but the old cached segment persists until the next reschedule.
|
||||
If this happens, then the user-visible state will change
|
||||
nondeterministically some time after modify_ldt() returns, which is
|
||||
unfortunate.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Chang Seok <chang.seok.bae@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit a632375764aa25c97b78beb56c71b0ba59d1cf83)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 295cb0b06150958ec84ee4b8844ef7e389e22c4e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/ldt.c | 21 +++++++++++++++++++++
|
||||
1 file changed, 21 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
|
||||
index a870910c8565..f0e64db18ac8 100644
|
||||
--- a/arch/x86/kernel/ldt.c
|
||||
+++ b/arch/x86/kernel/ldt.c
|
||||
@@ -21,6 +21,25 @@
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/syscalls.h>
|
||||
|
||||
+static void refresh_ldt_segments(void)
|
||||
+{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ unsigned short sel;
|
||||
+
|
||||
+ /*
|
||||
+ * Make sure that the cached DS and ES descriptors match the updated
|
||||
+ * LDT.
|
||||
+ */
|
||||
+ savesegment(ds, sel);
|
||||
+ if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
|
||||
+ loadsegment(ds, sel);
|
||||
+
|
||||
+ savesegment(es, sel);
|
||||
+ if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
|
||||
+ loadsegment(es, sel);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/* context.lock is held for us, so we don't need any locking. */
|
||||
static void flush_ldt(void *__mm)
|
||||
{
|
||||
@@ -32,6 +51,8 @@ static void flush_ldt(void *__mm)
|
||||
|
||||
pc = &mm->context;
|
||||
set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
|
||||
+
|
||||
+ refresh_ldt_segments();
|
||||
}
|
||||
|
||||
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,182 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:15 -0700
|
||||
Subject: [PATCH] x86/mm: Give each mm TLB flush generation a unique ID
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This adds two new variables to mmu_context_t: ctx_id and tlb_gen.
|
||||
ctx_id uniquely identifies the mm_struct and will never be reused.
|
||||
For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic
|
||||
count of the number of times that a TLB flush has been requested.
|
||||
The pair (ctx_id, tlb_gen) can be used as an identifier for TLB
|
||||
flush actions and will be used in subsequent patches to reliably
|
||||
determine whether all needed TLB flushes have occurred on a given
|
||||
CPU.
|
||||
|
||||
This patch is split out for ease of review. By itself, it has no
|
||||
real effect other than creating and updating the new variables.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit f39681ed0f48498b80455095376f11535feea332)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit e566a0dfbb2a5f7ea90dd66ce384740372739e14)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu.h | 25 +++++++++++++++++++++++--
|
||||
arch/x86/include/asm/mmu_context.h | 6 ++++++
|
||||
arch/x86/include/asm/tlbflush.h | 18 ++++++++++++++++++
|
||||
arch/x86/mm/tlb.c | 6 ++++--
|
||||
4 files changed, 51 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
|
||||
index 79b647a7ebd0..bb8c597c2248 100644
|
||||
--- a/arch/x86/include/asm/mmu.h
|
||||
+++ b/arch/x86/include/asm/mmu.h
|
||||
@@ -3,12 +3,28 @@
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
+#include <linux/atomic.h>
|
||||
|
||||
/*
|
||||
- * The x86 doesn't have a mmu context, but
|
||||
- * we put the segment information here.
|
||||
+ * x86 has arch-specific MMU state beyond what lives in mm_struct.
|
||||
*/
|
||||
typedef struct {
|
||||
+ /*
|
||||
+ * ctx_id uniquely identifies this mm_struct. A ctx_id will never
|
||||
+ * be reused, and zero is not a valid ctx_id.
|
||||
+ */
|
||||
+ u64 ctx_id;
|
||||
+
|
||||
+ /*
|
||||
+ * Any code that needs to do any sort of TLB flushing for this
|
||||
+ * mm will first make its changes to the page tables, then
|
||||
+ * increment tlb_gen, then flush. This lets the low-level
|
||||
+ * flushing code keep track of what needs flushing.
|
||||
+ *
|
||||
+ * This is not used on Xen PV.
|
||||
+ */
|
||||
+ atomic64_t tlb_gen;
|
||||
+
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
#endif
|
||||
@@ -37,6 +53,11 @@ typedef struct {
|
||||
#endif
|
||||
} mm_context_t;
|
||||
|
||||
+#define INIT_MM_CONTEXT(mm) \
|
||||
+ .context = { \
|
||||
+ .ctx_id = 1, \
|
||||
+ }
|
||||
+
|
||||
void leave_mm(int cpu);
|
||||
|
||||
#endif /* _ASM_X86_MMU_H */
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index 7a234be7e298..6c05679c715b 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -12,6 +12,9 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/mpx.h>
|
||||
+
|
||||
+extern atomic64_t last_mm_ctx_id;
|
||||
+
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
static inline void paravirt_activate_mm(struct mm_struct *prev,
|
||||
struct mm_struct *next)
|
||||
@@ -132,6 +135,9 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
+ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
|
||||
+ atomic64_set(&mm->context.tlb_gen, 0);
|
||||
+
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
|
||||
/* pkey 0 is the default and always allocated */
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 2b3d68093235..f1f2e73b7b77 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -57,6 +57,23 @@ static inline void invpcid_flush_all_nonglobals(void)
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
|
||||
}
|
||||
|
||||
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
+{
|
||||
+ u64 new_tlb_gen;
|
||||
+
|
||||
+ /*
|
||||
+ * Bump the generation count. This also serves as a full barrier
|
||||
+ * that synchronizes with switch_mm(): callers are required to order
|
||||
+ * their read of mm_cpumask after their writes to the paging
|
||||
+ * structures.
|
||||
+ */
|
||||
+ smp_mb__before_atomic();
|
||||
+ new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
|
||||
+ smp_mb__after_atomic();
|
||||
+
|
||||
+ return new_tlb_gen;
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
@@ -270,6 +287,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
+ inc_mm_tlb_gen(mm);
|
||||
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
|
||||
}
|
||||
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 014d07a80053..14f4f8f66aa8 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -28,6 +28,8 @@
|
||||
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
||||
*/
|
||||
|
||||
+atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
|
||||
+
|
||||
void leave_mm(int cpu)
|
||||
{
|
||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
@@ -250,8 +252,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
|
||||
cpu = get_cpu();
|
||||
|
||||
- /* Synchronize with switch_mm. */
|
||||
- smp_mb();
|
||||
+ /* This is also a barrier that synchronizes with switch_mm(). */
|
||||
+ inc_mm_tlb_gen(mm);
|
||||
|
||||
/* Should we flush just the requested range? */
|
||||
if ((end != TLB_FLUSH_ALL) &&
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,279 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:16 -0700
|
||||
Subject: [PATCH] x86/mm: Track the TLB's tlb_gen and update the flushing
|
||||
algorithm
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
There are two kernel features that would benefit from tracking
|
||||
how up-to-date each CPU's TLB is in the case where IPIs aren't keeping
|
||||
it up to date in real time:
|
||||
|
||||
- Lazy mm switching currently works by switching to init_mm when
|
||||
it would otherwise flush. This is wasteful: there isn't fundamentally
|
||||
any need to update CR3 at all when going lazy or when returning from
|
||||
lazy mode, nor is there any need to receive flush IPIs at all. Instead,
|
||||
we should just stop trying to keep the TLB coherent when we go lazy and,
|
||||
when unlazying, check whether we missed any flushes.
|
||||
|
||||
- PCID will let us keep recent user contexts alive in the TLB. If we
|
||||
start doing this, we need a way to decide whether those contexts are
|
||||
up to date.
|
||||
|
||||
On some paravirt systems, remote TLBs can be flushed without IPIs.
|
||||
This won't update the target CPUs' tlb_gens, which may cause
|
||||
unnecessary local flushes later on. We can address this if it becomes
|
||||
a problem by carefully updating the target CPU's tlb_gen directly.
|
||||
|
||||
By itself, this patch is a very minor optimization that avoids
|
||||
unnecessary flushes when multiple TLB flushes targetting the same CPU
|
||||
race. The complexity in this patch would not be worth it on its own,
|
||||
but it will enable improved lazy TLB tracking and PCID.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/1210fb244bc9cbe7677f7f0b72db4d359675f24b.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit b0579ade7cd82391360e959cc844e50a160e8a96)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d34881c25f3c70228ed792fd62881185a25c4422)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/tlbflush.h | 43 +++++++++++++++--
|
||||
arch/x86/mm/tlb.c | 102 +++++++++++++++++++++++++++++++++++++---
|
||||
2 files changed, 135 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index f1f2e73b7b77..3a167c214560 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -82,6 +82,11 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
|
||||
#endif
|
||||
|
||||
+struct tlb_context {
|
||||
+ u64 ctx_id;
|
||||
+ u64 tlb_gen;
|
||||
+};
|
||||
+
|
||||
struct tlb_state {
|
||||
/*
|
||||
* cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
|
||||
@@ -97,6 +102,21 @@ struct tlb_state {
|
||||
* disabling interrupts when modifying either one.
|
||||
*/
|
||||
unsigned long cr4;
|
||||
+
|
||||
+ /*
|
||||
+ * This is a list of all contexts that might exist in the TLB.
|
||||
+ * Since we don't yet use PCID, there is only one context.
|
||||
+ *
|
||||
+ * For each context, ctx_id indicates which mm the TLB's user
|
||||
+ * entries came from. As an invariant, the TLB will never
|
||||
+ * contain entries that are out-of-date as when that mm reached
|
||||
+ * the tlb_gen in the list.
|
||||
+ *
|
||||
+ * To be clear, this means that it's legal for the TLB code to
|
||||
+ * flush the TLB without updating tlb_gen. This can happen
|
||||
+ * (for now, at least) due to paravirt remote flushes.
|
||||
+ */
|
||||
+ struct tlb_context ctxs[1];
|
||||
};
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
|
||||
|
||||
@@ -256,9 +276,26 @@ static inline void __flush_tlb_one(unsigned long addr)
|
||||
* and page-granular flushes are available only on i486 and up.
|
||||
*/
|
||||
struct flush_tlb_info {
|
||||
- struct mm_struct *mm;
|
||||
- unsigned long start;
|
||||
- unsigned long end;
|
||||
+ /*
|
||||
+ * We support several kinds of flushes.
|
||||
+ *
|
||||
+ * - Fully flush a single mm. .mm will be set, .end will be
|
||||
+ * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
|
||||
+ * which the IPI sender is trying to catch us up.
|
||||
+ *
|
||||
+ * - Partially flush a single mm. .mm will be set, .start and
|
||||
+ * .end will indicate the range, and .new_tlb_gen will be set
|
||||
+ * such that the changes between generation .new_tlb_gen-1 and
|
||||
+ * .new_tlb_gen are entirely contained in the indicated range.
|
||||
+ *
|
||||
+ * - Fully flush all mms whose tlb_gens have been updated. .mm
|
||||
+ * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
|
||||
+ * will be zero.
|
||||
+ */
|
||||
+ struct mm_struct *mm;
|
||||
+ unsigned long start;
|
||||
+ unsigned long end;
|
||||
+ u64 new_tlb_gen;
|
||||
};
|
||||
|
||||
#define local_flush_tlb() __flush_tlb()
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 14f4f8f66aa8..4e5a5ddb9e4d 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -105,6 +105,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
}
|
||||
|
||||
this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
|
||||
|
||||
WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
@@ -155,25 +157,102 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * flush_tlb_func_common()'s memory ordering requirement is that any
|
||||
+ * TLB fills that happen after we flush the TLB are ordered after we
|
||||
+ * read active_mm's tlb_gen. We don't need any explicit barriers
|
||||
+ * because all x86 flush operations are serializing and the
|
||||
+ * atomic64_read operation won't be reordered by the compiler.
|
||||
+ */
|
||||
static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
bool local, enum tlb_flush_reason reason)
|
||||
{
|
||||
+ /*
|
||||
+ * We have three different tlb_gen values in here. They are:
|
||||
+ *
|
||||
+ * - mm_tlb_gen: the latest generation.
|
||||
+ * - local_tlb_gen: the generation that this CPU has already caught
|
||||
+ * up to.
|
||||
+ * - f->new_tlb_gen: the generation that the requester of the flush
|
||||
+ * wants us to catch up to.
|
||||
+ */
|
||||
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
|
||||
+ u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
|
||||
+
|
||||
/* This code cannot presently handle being reentered. */
|
||||
VM_WARN_ON(!irqs_disabled());
|
||||
|
||||
+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ loaded_mm->context.ctx_id);
|
||||
+
|
||||
if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
|
||||
+ /*
|
||||
+ * leave_mm() is adequate to handle any type of flush, and
|
||||
+ * we would prefer not to receive further IPIs. leave_mm()
|
||||
+ * clears this CPU's bit in mm_cpumask().
|
||||
+ */
|
||||
leave_mm(smp_processor_id());
|
||||
return;
|
||||
}
|
||||
|
||||
- if (f->end == TLB_FLUSH_ALL) {
|
||||
- local_flush_tlb();
|
||||
- if (local)
|
||||
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
- trace_tlb_flush(reason, TLB_FLUSH_ALL);
|
||||
- } else {
|
||||
+ if (unlikely(local_tlb_gen == mm_tlb_gen)) {
|
||||
+ /*
|
||||
+ * There's nothing to do: we're already up to date. This can
|
||||
+ * happen if two concurrent flushes happen -- the first flush to
|
||||
+ * be handled can catch us all the way up, leaving no work for
|
||||
+ * the second flush.
|
||||
+ */
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
|
||||
+ WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
|
||||
+
|
||||
+ /*
|
||||
+ * If we get to this point, we know that our TLB is out of date.
|
||||
+ * This does not strictly imply that we need to flush (it's
|
||||
+ * possible that f->new_tlb_gen <= local_tlb_gen), but we're
|
||||
+ * going to need to flush in the very near future, so we might
|
||||
+ * as well get it over with.
|
||||
+ *
|
||||
+ * The only question is whether to do a full or partial flush.
|
||||
+ *
|
||||
+ * We do a partial flush if requested and two extra conditions
|
||||
+ * are met:
|
||||
+ *
|
||||
+ * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
|
||||
+ * we've always done all needed flushes to catch up to
|
||||
+ * local_tlb_gen. If, for example, local_tlb_gen == 2 and
|
||||
+ * f->new_tlb_gen == 3, then we know that the flush needed to bring
|
||||
+ * us up to date for tlb_gen 3 is the partial flush we're
|
||||
+ * processing.
|
||||
+ *
|
||||
+ * As an example of why this check is needed, suppose that there
|
||||
+ * are two concurrent flushes. The first is a full flush that
|
||||
+ * changes context.tlb_gen from 1 to 2. The second is a partial
|
||||
+ * flush that changes context.tlb_gen from 2 to 3. If they get
|
||||
+ * processed on this CPU in reverse order, we'll see
|
||||
+ * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
|
||||
+ * If we were to use __flush_tlb_single() and set local_tlb_gen to
|
||||
+ * 3, we'd be break the invariant: we'd update local_tlb_gen above
|
||||
+ * 1 without the full flush that's needed for tlb_gen 2.
|
||||
+ *
|
||||
+ * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
|
||||
+ * Partial TLB flushes are not all that much cheaper than full TLB
|
||||
+ * flushes, so it seems unlikely that it would be a performance win
|
||||
+ * to do a partial flush if that won't bring our TLB fully up to
|
||||
+ * date. By doing a full flush instead, we can increase
|
||||
+ * local_tlb_gen all the way to mm_tlb_gen and we can probably
|
||||
+ * avoid another flush in the very near future.
|
||||
+ */
|
||||
+ if (f->end != TLB_FLUSH_ALL &&
|
||||
+ f->new_tlb_gen == local_tlb_gen + 1 &&
|
||||
+ f->new_tlb_gen == mm_tlb_gen) {
|
||||
+ /* Partial flush */
|
||||
unsigned long addr;
|
||||
unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
|
||||
+
|
||||
addr = f->start;
|
||||
while (addr < f->end) {
|
||||
__flush_tlb_single(addr);
|
||||
@@ -182,7 +261,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
if (local)
|
||||
count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
|
||||
trace_tlb_flush(reason, nr_pages);
|
||||
+ } else {
|
||||
+ /* Full flush. */
|
||||
+ local_flush_tlb();
|
||||
+ if (local)
|
||||
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
+ trace_tlb_flush(reason, TLB_FLUSH_ALL);
|
||||
}
|
||||
+
|
||||
+ /* Both paths above update our state to mm_tlb_gen. */
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
|
||||
}
|
||||
|
||||
static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
|
||||
@@ -253,7 +341,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
cpu = get_cpu();
|
||||
|
||||
/* This is also a barrier that synchronizes with switch_mm(). */
|
||||
- inc_mm_tlb_gen(mm);
|
||||
+ info.new_tlb_gen = inc_mm_tlb_gen(mm);
|
||||
|
||||
/* Should we flush just the requested range? */
|
||||
if ((end != TLB_FLUSH_ALL) &&
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,453 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:17 -0700
|
||||
Subject: [PATCH] x86/mm: Rework lazy TLB mode and TLB freshness tracking
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
x86's lazy TLB mode used to be fairly weak -- it would switch to
|
||||
init_mm the first time it tried to flush a lazy TLB. This meant an
|
||||
unnecessary CR3 write and, if the flush was remote, an unnecessary
|
||||
IPI.
|
||||
|
||||
Rewrite it entirely. When we enter lazy mode, we simply remove the
|
||||
CPU from mm_cpumask. This means that we need a way to figure out
|
||||
whether we've missed a flush when we switch back out of lazy mode.
|
||||
I use the tlb_gen machinery to track whether a context is up to
|
||||
date.
|
||||
|
||||
Note to reviewers: this patch, my itself, looks a bit odd. I'm
|
||||
using an array of length 1 containing (ctx_id, tlb_gen) rather than
|
||||
just storing tlb_gen, and making it at array isn't necessary yet.
|
||||
I'm doing this because the next few patches add PCID support, and,
|
||||
with PCID, we need ctx_id, and the array will end up with a length
|
||||
greater than 1. Making it an array now means that there will be
|
||||
less churn and therefore less stress on your eyeballs.
|
||||
|
||||
NB: This is dubious but, AFAICT, still correct on Xen and UV.
|
||||
xen_exit_mmap() uses mm_cpumask() for nefarious purposes and this
|
||||
patch changes the way that mm_cpumask() works. This should be okay,
|
||||
since Xen *also* iterates all online CPUs to find all the CPUs it
|
||||
needs to twiddle.
|
||||
|
||||
The UV tlbflush code is rather dated and should be changed.
|
||||
|
||||
Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
|
||||
(turbo off, intel_pstate requesting max performance) under KVM with
|
||||
the guest using idle=poll (to avoid artifacts when bouncing between
|
||||
CPUs). I haven't done any real statistics here -- I just ran them
|
||||
in a loop and picked the fastest results that didn't look like
|
||||
outliers. Unpatched means commit a4eb8b993554, so all the
|
||||
bookkeeping overhead is gone.
|
||||
|
||||
MADV_DONTNEED; touch the page; switch CPUs using sched_setaffinity. In
|
||||
an unpatched kernel, MADV_DONTNEED will send an IPI to the previous CPU.
|
||||
This is intended to be a nearly worst-case test.
|
||||
|
||||
patched: 13.4µs
|
||||
unpatched: 21.6µs
|
||||
|
||||
Vitaly's pthread_mmap microbenchmark with 8 threads (on four cores),
|
||||
nrounds = 100, 256M data
|
||||
|
||||
patched: 1.1 seconds or so
|
||||
unpatched: 1.9 seconds or so
|
||||
|
||||
The sleepup on Vitaly's test appearss to be because it spends a lot
|
||||
of time blocked on mmap_sem, and this patch avoids sending IPIs to
|
||||
blocked CPUs.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Banman <abanman@sgi.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Dimitri Sivanich <sivanich@sgi.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Mike Travis <travis@sgi.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/ddf2c92962339f4ba39d8fc41b853936ec0b44f1.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 94b1b03b519b81c494900cb112aa00ed205cc2d9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b381b7ae452f2bc6384507a897247be7c93a71cc)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 6 +-
|
||||
arch/x86/include/asm/tlbflush.h | 4 -
|
||||
arch/x86/mm/init.c | 1 -
|
||||
arch/x86/mm/tlb.c | 197 ++++++++++++++++++++++---------------
|
||||
arch/x86/xen/mmu_pv.c | 5 +-
|
||||
5 files changed, 124 insertions(+), 89 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index 6c05679c715b..d6b055b328f2 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -128,8 +128,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
|
||||
|
||||
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
|
||||
+ int cpu = smp_processor_id();
|
||||
+
|
||||
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
|
||||
+ cpumask_clear_cpu(cpu, mm_cpumask(mm));
|
||||
}
|
||||
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 3a167c214560..6397275008db 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -95,7 +95,6 @@ struct tlb_state {
|
||||
* mode even if we've already switched back to swapper_pg_dir.
|
||||
*/
|
||||
struct mm_struct *loaded_mm;
|
||||
- int state;
|
||||
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
@@ -318,9 +317,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
|
||||
void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
const struct flush_tlb_info *info);
|
||||
|
||||
-#define TLBSTATE_OK 1
|
||||
-#define TLBSTATE_LAZY 2
|
||||
-
|
||||
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
||||
index df2624b091a7..c86dc071bb10 100644
|
||||
--- a/arch/x86/mm/init.c
|
||||
+++ b/arch/x86/mm/init.c
|
||||
@@ -849,7 +849,6 @@ void __init zone_sizes_init(void)
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
||||
.loaded_mm = &init_mm,
|
||||
- .state = 0,
|
||||
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(cpu_tlbstate);
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 4e5a5ddb9e4d..0982c997d36f 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -45,8 +45,8 @@ void leave_mm(int cpu)
|
||||
if (loaded_mm == &init_mm)
|
||||
return;
|
||||
|
||||
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
- BUG();
|
||||
+ /* Warn if we're not lazy. */
|
||||
+ WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
|
||||
|
||||
switch_mm(NULL, &init_mm, NULL);
|
||||
}
|
||||
@@ -65,94 +65,117 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
- unsigned cpu = smp_processor_id();
|
||||
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ unsigned cpu = smp_processor_id();
|
||||
+ u64 next_tlb_gen;
|
||||
|
||||
/*
|
||||
- * NB: The scheduler will call us with prev == next when
|
||||
- * switching from lazy TLB mode to normal mode if active_mm
|
||||
- * isn't changing. When this happens, there is no guarantee
|
||||
- * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
|
||||
+ * NB: The scheduler will call us with prev == next when switching
|
||||
+ * from lazy TLB mode to normal mode if active_mm isn't changing.
|
||||
+ * When this happens, we don't assume that CR3 (and hence
|
||||
+ * cpu_tlbstate.loaded_mm) matches next.
|
||||
*
|
||||
* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
|
||||
*/
|
||||
|
||||
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
+ /* We don't want flush_tlb_func_* to run concurrently with us. */
|
||||
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING))
|
||||
+ WARN_ON_ONCE(!irqs_disabled());
|
||||
+
|
||||
+ /*
|
||||
+ * Verify that CR3 is what we think it is. This will catch
|
||||
+ * hypothetical buggy code that directly switches to swapper_pg_dir
|
||||
+ * without going through leave_mm() / switch_mm_irqs_off().
|
||||
+ */
|
||||
+ VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
|
||||
|
||||
if (real_prev == next) {
|
||||
- /*
|
||||
- * There's nothing to do: we always keep the per-mm control
|
||||
- * regs in sync with cpu_tlbstate.loaded_mm. Just
|
||||
- * sanity-check mm_cpumask.
|
||||
- */
|
||||
- if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
|
||||
- cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
- return;
|
||||
- }
|
||||
+ VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ next->context.ctx_id);
|
||||
+
|
||||
+ if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||
+ /*
|
||||
+ * There's nothing to do: we weren't lazy, and we
|
||||
+ * aren't changing our mm. We don't need to flush
|
||||
+ * anything, nor do we need to update CR3, CR4, or
|
||||
+ * LDTR.
|
||||
+ */
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Resume remote flushes and then read tlb_gen. */
|
||||
+ cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+ next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
+
|
||||
+ if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
|
||||
+ /*
|
||||
+ * Ideally, we'd have a flush_tlb() variant that
|
||||
+ * takes the known CR3 value as input. This would
|
||||
+ * be faster on Xen PV and on hypothetical CPUs
|
||||
+ * on which INVPCID is fast.
|
||||
+ */
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
|
||||
+ next_tlb_gen);
|
||||
+ write_cr3(__pa(next->pgd));
|
||||
+
|
||||
+ /*
|
||||
+ * This gets called via leave_mm() in the idle path
|
||||
+ * where RCU functions differently. Tracing normally
|
||||
+ * uses RCU, so we have to call the tracepoint
|
||||
+ * specially here.
|
||||
+ */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
+ TLB_FLUSH_ALL);
|
||||
+ }
|
||||
|
||||
- if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
/*
|
||||
- * If our current stack is in vmalloc space and isn't
|
||||
- * mapped in the new pgd, we'll double-fault. Forcibly
|
||||
- * map it.
|
||||
+ * We just exited lazy mode, which means that CR4 and/or LDTR
|
||||
+ * may be stale. (Changes to the required CR4 and LDTR states
|
||||
+ * are not reflected in tlb_gen.)
|
||||
*/
|
||||
- unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
|
||||
-
|
||||
- pgd_t *pgd = next->pgd + stack_pgd_index;
|
||||
-
|
||||
- if (unlikely(pgd_none(*pgd)))
|
||||
- set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
|
||||
- }
|
||||
+ } else {
|
||||
+ VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
|
||||
+ next->context.ctx_id);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
+ /*
|
||||
+ * If our current stack is in vmalloc space and isn't
|
||||
+ * mapped in the new pgd, we'll double-fault. Forcibly
|
||||
+ * map it.
|
||||
+ */
|
||||
+ unsigned int index = pgd_index(current_stack_pointer());
|
||||
+ pgd_t *pgd = next->pgd + index;
|
||||
+
|
||||
+ if (unlikely(pgd_none(*pgd)))
|
||||
+ set_pgd(pgd, init_mm.pgd[index]);
|
||||
+ }
|
||||
|
||||
- this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
|
||||
+ /* Stop remote flushes for the previous mm */
|
||||
+ if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
|
||||
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
|
||||
- WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
- cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+ VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
|
||||
- /*
|
||||
- * Re-load page tables.
|
||||
- *
|
||||
- * This logic has an ordering constraint:
|
||||
- *
|
||||
- * CPU 0: Write to a PTE for 'next'
|
||||
- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
||||
- * CPU 1: set bit 1 in next's mm_cpumask
|
||||
- * CPU 1: load from the PTE that CPU 0 writes (implicit)
|
||||
- *
|
||||
- * We need to prevent an outcome in which CPU 1 observes
|
||||
- * the new PTE value and CPU 0 observes bit 1 clear in
|
||||
- * mm_cpumask. (If that occurs, then the IPI will never
|
||||
- * be sent, and CPU 0's TLB will contain a stale entry.)
|
||||
- *
|
||||
- * The bad outcome can occur if either CPU's load is
|
||||
- * reordered before that CPU's store, so both CPUs must
|
||||
- * execute full barriers to prevent this from happening.
|
||||
- *
|
||||
- * Thus, switch_mm needs a full barrier between the
|
||||
- * store to mm_cpumask and any operation that could load
|
||||
- * from next->pgd. TLB fills are special and can happen
|
||||
- * due to instruction fetches or for no reason at all,
|
||||
- * and neither LOCK nor MFENCE orders them.
|
||||
- * Fortunately, load_cr3() is serializing and gives the
|
||||
- * ordering guarantee we need.
|
||||
- */
|
||||
- load_cr3(next->pgd);
|
||||
+ /*
|
||||
+ * Start remote flushes and then read tlb_gen.
|
||||
+ */
|
||||
+ cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+ next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
|
||||
- /*
|
||||
- * This gets called via leave_mm() in the idle path where RCU
|
||||
- * functions differently. Tracing normally uses RCU, so we have to
|
||||
- * call the tracepoint specially here.
|
||||
- */
|
||||
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
+ write_cr3(__pa(next->pgd));
|
||||
|
||||
- /* Stop flush ipis for the previous mm */
|
||||
- WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
|
||||
- real_prev != &init_mm);
|
||||
- cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
+ /*
|
||||
+ * This gets called via leave_mm() in the idle path where RCU
|
||||
+ * functions differently. Tracing normally uses RCU, so we
|
||||
+ * have to call the tracepoint specially here.
|
||||
+ */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
+ TLB_FLUSH_ALL);
|
||||
+ }
|
||||
|
||||
- /* Load per-mm CR4 and LDTR state */
|
||||
load_mm_cr4(next);
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
@@ -186,13 +209,13 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
loaded_mm->context.ctx_id);
|
||||
|
||||
- if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
|
||||
+ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
|
||||
/*
|
||||
- * leave_mm() is adequate to handle any type of flush, and
|
||||
- * we would prefer not to receive further IPIs. leave_mm()
|
||||
- * clears this CPU's bit in mm_cpumask().
|
||||
+ * We're in lazy mode -- don't flush. We can get here on
|
||||
+ * remote flushes due to races and on local flushes if a
|
||||
+ * kernel thread coincidentally flushes the mm it's lazily
|
||||
+ * still using.
|
||||
*/
|
||||
- leave_mm(smp_processor_id());
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -203,6 +226,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
* be handled can catch us all the way up, leaving no work for
|
||||
* the second flush.
|
||||
*/
|
||||
+ trace_tlb_flush(reason, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -304,6 +328,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
(info->end - info->start) >> PAGE_SHIFT);
|
||||
|
||||
if (is_uv_system()) {
|
||||
+ /*
|
||||
+ * This whole special case is confused. UV has a "Broadcast
|
||||
+ * Assist Unit", which seems to be a fancy way to send IPIs.
|
||||
+ * Back when x86 used an explicit TLB flush IPI, UV was
|
||||
+ * optimized to use its own mechanism. These days, x86 uses
|
||||
+ * smp_call_function_many(), but UV still uses a manual IPI,
|
||||
+ * and that IPI's action is out of date -- it does a manual
|
||||
+ * flush instead of calling flush_tlb_func_remote(). This
|
||||
+ * means that the percpu tlb_gen variables won't be updated
|
||||
+ * and we'll do pointless flushes on future context switches.
|
||||
+ *
|
||||
+ * Rather than hooking native_flush_tlb_others() here, I think
|
||||
+ * that UV should be updated so that smp_call_function_many(),
|
||||
+ * etc, are optimal on UV.
|
||||
+ */
|
||||
unsigned int cpu;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
@@ -363,6 +402,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
|
||||
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
|
||||
flush_tlb_others(mm_cpumask(mm), &info);
|
||||
+
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
@@ -371,8 +411,6 @@ static void do_flush_tlb_all(void *info)
|
||||
{
|
||||
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
|
||||
__flush_tlb_all();
|
||||
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
|
||||
- leave_mm(smp_processor_id());
|
||||
}
|
||||
|
||||
void flush_tlb_all(void)
|
||||
@@ -425,6 +463,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
||||
|
||||
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
|
||||
flush_tlb_others(&batch->cpumask, &info);
|
||||
+
|
||||
cpumask_clear(&batch->cpumask);
|
||||
|
||||
put_cpu();
|
||||
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
|
||||
index 5f61b7e2e6b2..ba76f3ce997f 100644
|
||||
--- a/arch/x86/xen/mmu_pv.c
|
||||
+++ b/arch/x86/xen/mmu_pv.c
|
||||
@@ -1005,14 +1005,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
|
||||
/* Get the "official" set of cpus referring to our pagetable. */
|
||||
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
|
||||
for_each_online_cpu(cpu) {
|
||||
- if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
|
||||
- && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
|
||||
+ if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
|
||||
continue;
|
||||
smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
- cpumask_copy(mask, mm_cpumask(mm));
|
||||
|
||||
/*
|
||||
* It's possible that a vcpu may have a stale reference to our
|
||||
@@ -1021,6 +1019,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
|
||||
* look at its actual current cr3 value, and force it to flush
|
||||
* if needed.
|
||||
*/
|
||||
+ cpumask_clear(mask);
|
||||
for_each_online_cpu(cpu) {
|
||||
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
|
||||
cpumask_set_cpu(cpu, mask);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,340 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 24 Jul 2017 21:41:38 -0700
|
||||
Subject: [PATCH] x86/mm: Implement PCID based optimization: try to preserve
|
||||
old TLB entries using PCID
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
PCID is a "process context ID" -- it's what other architectures call
|
||||
an address space ID. Every non-global TLB entry is tagged with a
|
||||
PCID, only TLB entries that match the currently selected PCID are
|
||||
used, and we can switch PGDs without flushing the TLB. x86's
|
||||
PCID is 12 bits.
|
||||
|
||||
This is an unorthodox approach to using PCID. x86's PCID is far too
|
||||
short to uniquely identify a process, and we can't even really
|
||||
uniquely identify a running process because there are monster
|
||||
systems with over 4096 CPUs. To make matters worse, past attempts
|
||||
to use all 12 PCID bits have resulted in slowdowns instead of
|
||||
speedups.
|
||||
|
||||
This patch uses PCID differently. We use a PCID to identify a
|
||||
recently-used mm on a per-cpu basis. An mm has no fixed PCID
|
||||
binding at all; instead, we give it a fresh PCID each time it's
|
||||
loaded except in cases where we want to preserve the TLB, in which
|
||||
case we reuse a recent value.
|
||||
|
||||
Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
|
||||
(turbo off, intel_pstate requesting max performance) under KVM with
|
||||
the guest using idle=poll (to avoid artifacts when bouncing between
|
||||
CPUs). I haven't done any real statistics here -- I just ran them
|
||||
in a loop and picked the fastest results that didn't look like
|
||||
outliers. Unpatched means commit a4eb8b993554, so all the
|
||||
bookkeeping overhead is gone.
|
||||
|
||||
ping-pong between two mms on the same CPU using eventfd:
|
||||
|
||||
patched: 1.22µs
|
||||
patched, nopcid: 1.33µs
|
||||
unpatched: 1.34µs
|
||||
|
||||
Same ping-pong, but now touch 512 pages (all zero-page to minimize
|
||||
cache misses) each iteration. dTLB misses are measured by
|
||||
dtlb_load_misses.miss_causes_a_walk:
|
||||
|
||||
patched: 1.8µs 11M dTLB misses
|
||||
patched, nopcid: 6.2µs, 207M dTLB misses
|
||||
unpatched: 6.1µs, 190M dTLB misses
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/9ee75f17a81770feed616358e6860d98a2a5b1e7.1500957502.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit 10af6235e0d327d42e1bad974385197817923dc1)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d833a976288cdcf7fb1dabb48ebf614ebf6a311c)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 3 ++
|
||||
arch/x86/include/asm/processor-flags.h | 2 +
|
||||
arch/x86/include/asm/tlbflush.h | 18 +++++++-
|
||||
arch/x86/mm/init.c | 1 +
|
||||
arch/x86/mm/tlb.c | 84 +++++++++++++++++++++++++---------
|
||||
5 files changed, 85 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index d6b055b328f2..7ae318c340d9 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -298,6 +298,9 @@ static inline unsigned long __get_current_cr3_fast(void)
|
||||
{
|
||||
unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
|
||||
|
||||
+ if (static_cpu_has(X86_FEATURE_PCID))
|
||||
+ cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
+
|
||||
/* For now, be very restrictive about when this can be called. */
|
||||
VM_WARN_ON(in_nmi() || preemptible());
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
|
||||
index 79aa2f98398d..791b60199aa4 100644
|
||||
--- a/arch/x86/include/asm/processor-flags.h
|
||||
+++ b/arch/x86/include/asm/processor-flags.h
|
||||
@@ -35,6 +35,7 @@
|
||||
/* Mask off the address space ID bits. */
|
||||
#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
|
||||
#define CR3_PCID_MASK 0xFFFull
|
||||
+#define CR3_NOFLUSH (1UL << 63)
|
||||
#else
|
||||
/*
|
||||
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
|
||||
@@ -42,6 +43,7 @@
|
||||
*/
|
||||
#define CR3_ADDR_MASK 0xFFFFFFFFull
|
||||
#define CR3_PCID_MASK 0ull
|
||||
+#define CR3_NOFLUSH 0
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 6397275008db..d23e61dc0640 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -82,6 +82,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
|
||||
#endif
|
||||
|
||||
+/*
|
||||
+ * 6 because 6 should be plenty and struct tlb_state will fit in
|
||||
+ * two cache lines.
|
||||
+ */
|
||||
+#define TLB_NR_DYN_ASIDS 6
|
||||
+
|
||||
struct tlb_context {
|
||||
u64 ctx_id;
|
||||
u64 tlb_gen;
|
||||
@@ -95,6 +101,8 @@ struct tlb_state {
|
||||
* mode even if we've already switched back to swapper_pg_dir.
|
||||
*/
|
||||
struct mm_struct *loaded_mm;
|
||||
+ u16 loaded_mm_asid;
|
||||
+ u16 next_asid;
|
||||
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
@@ -104,7 +112,8 @@ struct tlb_state {
|
||||
|
||||
/*
|
||||
* This is a list of all contexts that might exist in the TLB.
|
||||
- * Since we don't yet use PCID, there is only one context.
|
||||
+ * There is one per ASID that we use, and the ASID (what the
|
||||
+ * CPU calls PCID) is the index into ctxts.
|
||||
*
|
||||
* For each context, ctx_id indicates which mm the TLB's user
|
||||
* entries came from. As an invariant, the TLB will never
|
||||
@@ -114,8 +123,13 @@ struct tlb_state {
|
||||
* To be clear, this means that it's legal for the TLB code to
|
||||
* flush the TLB without updating tlb_gen. This can happen
|
||||
* (for now, at least) due to paravirt remote flushes.
|
||||
+ *
|
||||
+ * NB: context 0 is a bit special, since it's also used by
|
||||
+ * various bits of init code. This is fine -- code that
|
||||
+ * isn't aware of PCID will end up harmlessly flushing
|
||||
+ * context 0.
|
||||
*/
|
||||
- struct tlb_context ctxs[1];
|
||||
+ struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
|
||||
};
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
|
||||
|
||||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
||||
index c86dc071bb10..af5c1ed21d43 100644
|
||||
--- a/arch/x86/mm/init.c
|
||||
+++ b/arch/x86/mm/init.c
|
||||
@@ -849,6 +849,7 @@ void __init zone_sizes_init(void)
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
||||
.loaded_mm = &init_mm,
|
||||
+ .next_asid = 1,
|
||||
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(cpu_tlbstate);
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 0982c997d36f..57943b4d8f2e 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -30,6 +30,40 @@
|
||||
|
||||
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
|
||||
|
||||
+static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
|
||||
+ u16 *new_asid, bool *need_flush)
|
||||
+{
|
||||
+ u16 asid;
|
||||
+
|
||||
+ if (!static_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ *new_asid = 0;
|
||||
+ *need_flush = true;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
|
||||
+ if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
|
||||
+ next->context.ctx_id)
|
||||
+ continue;
|
||||
+
|
||||
+ *new_asid = asid;
|
||||
+ *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
|
||||
+ next_tlb_gen);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * We don't currently own an ASID slot on this CPU.
|
||||
+ * Allocate a slot.
|
||||
+ */
|
||||
+ *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
|
||||
+ if (*new_asid >= TLB_NR_DYN_ASIDS) {
|
||||
+ *new_asid = 0;
|
||||
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
|
||||
+ }
|
||||
+ *need_flush = true;
|
||||
+}
|
||||
+
|
||||
void leave_mm(int cpu)
|
||||
{
|
||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
@@ -66,6 +100,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
unsigned cpu = smp_processor_id();
|
||||
u64 next_tlb_gen;
|
||||
|
||||
@@ -85,12 +120,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
/*
|
||||
* Verify that CR3 is what we think it is. This will catch
|
||||
* hypothetical buggy code that directly switches to swapper_pg_dir
|
||||
- * without going through leave_mm() / switch_mm_irqs_off().
|
||||
+ * without going through leave_mm() / switch_mm_irqs_off() or that
|
||||
+ * does something like write_cr3(read_cr3_pa()).
|
||||
*/
|
||||
- VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
|
||||
+ VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
|
||||
|
||||
if (real_prev == next) {
|
||||
- VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||
next->context.ctx_id);
|
||||
|
||||
if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||
@@ -107,16 +143,17 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
|
||||
- if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
|
||||
+ if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
|
||||
+ next_tlb_gen) {
|
||||
/*
|
||||
* Ideally, we'd have a flush_tlb() variant that
|
||||
* takes the known CR3 value as input. This would
|
||||
* be faster on Xen PV and on hypothetical CPUs
|
||||
* on which INVPCID is fast.
|
||||
*/
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
|
||||
next_tlb_gen);
|
||||
- write_cr3(__pa(next->pgd));
|
||||
+ write_cr3(__pa(next->pgd) | prev_asid);
|
||||
|
||||
/*
|
||||
* This gets called via leave_mm() in the idle path
|
||||
@@ -134,8 +171,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
* are not reflected in tlb_gen.)
|
||||
*/
|
||||
} else {
|
||||
- VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
|
||||
- next->context.ctx_id);
|
||||
+ u16 new_asid;
|
||||
+ bool need_flush;
|
||||
|
||||
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
/*
|
||||
@@ -162,18 +199,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
|
||||
- this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
- write_cr3(__pa(next->pgd));
|
||||
+ choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
|
||||
|
||||
- /*
|
||||
- * This gets called via leave_mm() in the idle path where RCU
|
||||
- * functions differently. Tracing normally uses RCU, so we
|
||||
- * have to call the tracepoint specially here.
|
||||
- */
|
||||
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
+ if (need_flush) {
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
+ write_cr3(__pa(next->pgd) | new_asid);
|
||||
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
TLB_FLUSH_ALL);
|
||||
+ } else {
|
||||
+ /* The new ASID is already up to date. */
|
||||
+ write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
|
||||
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
+ }
|
||||
+
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
|
||||
}
|
||||
|
||||
load_mm_cr4(next);
|
||||
@@ -200,13 +241,14 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
* wants us to catch up to.
|
||||
*/
|
||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
|
||||
- u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
|
||||
+ u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
|
||||
|
||||
/* This code cannot presently handle being reentered. */
|
||||
VM_WARN_ON(!irqs_disabled());
|
||||
|
||||
- VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
|
||||
loaded_mm->context.ctx_id);
|
||||
|
||||
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
|
||||
@@ -294,7 +336,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
}
|
||||
|
||||
/* Both paths above update our state to mm_tlb_gen. */
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
|
||||
}
|
||||
|
||||
static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,176 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sun, 17 Sep 2017 09:03:48 -0700
|
||||
Subject: [PATCH] x86/mm: Factor out CR3-building code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Current, the code that assembles a value to load into CR3 is
|
||||
open-coded everywhere. Factor it out into helpers build_cr3() and
|
||||
build_cr3_noflush().
|
||||
|
||||
This makes one semantic change: __get_current_cr3_fast() was wrong
|
||||
on SME systems. No one noticed because the only caller is in the
|
||||
VMX code, and there are no CPUs with both SME and VMX.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
|
||||
Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 15 ++++++---
|
||||
arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++---
|
||||
2 files changed, 75 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index 7ae318c340d9..a999ba6b721f 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
return __pkru_allows_pkey(vma_pkey(vma), write);
|
||||
}
|
||||
|
||||
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
|
||||
+{
|
||||
+ return __sme_pa(mm->pgd) | asid;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
||||
+{
|
||||
+ return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
|
||||
+}
|
||||
|
||||
/*
|
||||
* This can be used from process context to figure out what the value of
|
||||
@@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
*/
|
||||
static inline unsigned long __get_current_cr3_fast(void)
|
||||
{
|
||||
- unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
|
||||
-
|
||||
- if (static_cpu_has(X86_FEATURE_PCID))
|
||||
- cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
|
||||
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid));
|
||||
|
||||
/* For now, be very restrictive about when this can be called. */
|
||||
VM_WARN_ON(in_nmi() || preemptible());
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 57943b4d8f2e..440400316c8a 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
* without going through leave_mm() / switch_mm_irqs_off() or that
|
||||
* does something like write_cr3(read_cr3_pa()).
|
||||
*/
|
||||
- VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
|
||||
+#ifdef CONFIG_DEBUG_VM
|
||||
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
|
||||
+ /*
|
||||
+ * If we were to BUG here, we'd be very likely to kill
|
||||
+ * the system so hard that we don't see the call trace.
|
||||
+ * Try to recover instead by ignoring the error and doing
|
||||
+ * a global flush to minimize the chance of corruption.
|
||||
+ *
|
||||
+ * (This is far from being a fully correct recovery.
|
||||
+ * Architecturally, the CPU could prefetch something
|
||||
+ * back into an incorrect ASID slot and leave it there
|
||||
+ * to cause trouble down the road. It's better than
|
||||
+ * nothing, though.)
|
||||
+ */
|
||||
+ __flush_tlb_all();
|
||||
+ }
|
||||
+#endif
|
||||
|
||||
if (real_prev == next) {
|
||||
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||
@@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
*/
|
||||
this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
|
||||
next_tlb_gen);
|
||||
- write_cr3(__pa(next->pgd) | prev_asid);
|
||||
+ write_cr3(build_cr3(next, prev_asid));
|
||||
|
||||
/*
|
||||
* This gets called via leave_mm() in the idle path
|
||||
@@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
if (need_flush) {
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
- write_cr3(__pa(next->pgd) | new_asid);
|
||||
+ write_cr3(build_cr3(next, new_asid));
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
TLB_FLUSH_ALL);
|
||||
} else {
|
||||
/* The new ASID is already up to date. */
|
||||
- write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
|
||||
+ write_cr3(build_cr3_noflush(next, new_asid));
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
}
|
||||
|
||||
@@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Call this when reinitializing a CPU. It fixes the following potential
|
||||
+ * problems:
|
||||
+ *
|
||||
+ * - The ASID changed from what cpu_tlbstate thinks it is (most likely
|
||||
+ * because the CPU was taken down and came back up with CR3's PCID
|
||||
+ * bits clear. CPU hotplug can do this.
|
||||
+ *
|
||||
+ * - The TLB contains junk in slots corresponding to inactive ASIDs.
|
||||
+ *
|
||||
+ * - The CPU went so far out to lunch that it may have missed a TLB
|
||||
+ * flush.
|
||||
+ */
|
||||
+void initialize_tlbstate_and_flush(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
|
||||
+ unsigned long cr3 = __read_cr3();
|
||||
+
|
||||
+ /* Assert that CR3 already references the right mm. */
|
||||
+ WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
|
||||
+
|
||||
+ /*
|
||||
+ * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
|
||||
+ * doesn't work like other CR4 bits because it can only be set from
|
||||
+ * long mode.)
|
||||
+ */
|
||||
+ WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
|
||||
+ !(cr4_read_shadow() & X86_CR4_PCIDE));
|
||||
+
|
||||
+ /* Force ASID 0 and force a TLB flush. */
|
||||
+ write_cr3(build_cr3(mm, 0));
|
||||
+
|
||||
+ /* Reinitialize tlbstate. */
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
||||
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
|
||||
+
|
||||
+ for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* flush_tlb_func_common()'s memory ordering requirement is that any
|
||||
* TLB fills that happen after we flush the TLB are ordered after we
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,85 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sun, 17 Sep 2017 09:03:49 -0700
|
||||
Subject: [PATCH] x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Putting the logical ASID into CR3's PCID bits directly means that we
|
||||
have two cases to consider separately: ASID == 0 and ASID != 0.
|
||||
This means that bugs that only hit in one of these cases trigger
|
||||
nondeterministically.
|
||||
|
||||
There were some bugs like this in the past, and I think there's
|
||||
still one in current kernels. In particular, we have a number of
|
||||
ASID-unware code paths that save CR3, write some special value, and
|
||||
then restore CR3. This includes suspend/resume, hibernate, kexec,
|
||||
EFI, and maybe other things I've missed. This is currently
|
||||
dangerous: if ASID != 0, then this code sequence will leave garbage
|
||||
in the TLB tagged for ASID 0. We could potentially see corruption
|
||||
when switching back to ASID 0. In principle, an
|
||||
initialize_tlbstate_and_flush() call after these sequences would
|
||||
solve the problem, but EFI, at least, does not call this. (And it
|
||||
probably shouldn't -- initialize_tlbstate_and_flush() is rather
|
||||
expensive.)
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/cdc14bbe5d3c3ef2a562be09a6368ffe9bd947a6.1505663533.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 52a2af400c1075219b3f0ce5c96fc961da44018a)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 15e474753e66e44da1365049f465427053a453ba)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 21 +++++++++++++++++++--
|
||||
1 file changed, 19 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index a999ba6b721f..c120b5db178a 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -286,14 +286,31 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
return __pkru_allows_pkey(vma_pkey(vma), write);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
|
||||
+ * bits. This serves two purposes. It prevents a nasty situation in
|
||||
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
|
||||
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
|
||||
+ * the saved ASID was nonzero. It also means that any bugs involving
|
||||
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
|
||||
+ * deterministically.
|
||||
+ */
|
||||
+
|
||||
static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
|
||||
{
|
||||
- return __sme_pa(mm->pgd) | asid;
|
||||
+ if (static_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ VM_WARN_ON_ONCE(asid > 4094);
|
||||
+ return __sme_pa(mm->pgd) | (asid + 1);
|
||||
+ } else {
|
||||
+ VM_WARN_ON_ONCE(asid != 0);
|
||||
+ return __sme_pa(mm->pgd);
|
||||
+ }
|
||||
}
|
||||
|
||||
static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
||||
{
|
||||
- return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
|
||||
+ VM_WARN_ON_ONCE(asid > 4094);
|
||||
+ return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,401 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 9 Oct 2017 09:50:49 -0700
|
||||
Subject: [PATCH] x86/mm: Flush more aggressively in lazy TLB mode
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Since commit:
|
||||
|
||||
94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
|
||||
|
||||
x86's lazy TLB mode has been all the way lazy: when running a kernel thread
|
||||
(including the idle thread), the kernel keeps using the last user mm's
|
||||
page tables without attempting to maintain user TLB coherence at all.
|
||||
|
||||
From a pure semantic perspective, this is fine -- kernel threads won't
|
||||
attempt to access user pages, so having stale TLB entries doesn't matter.
|
||||
|
||||
Unfortunately, I forgot about a subtlety. By skipping TLB flushes,
|
||||
we also allow any paging-structure caches that may exist on the CPU
|
||||
to become incoherent. This means that we can have a
|
||||
paging-structure cache entry that references a freed page table, and
|
||||
the CPU is within its rights to do a speculative page walk starting
|
||||
at the freed page table.
|
||||
|
||||
I can imagine this causing two different problems:
|
||||
|
||||
- A speculative page walk starting from a bogus page table could read
|
||||
IO addresses. I haven't seen any reports of this causing problems.
|
||||
|
||||
- A speculative page walk that involves a bogus page table can install
|
||||
garbage in the TLB. Such garbage would always be at a user VA, but
|
||||
some AMD CPUs have logic that triggers a machine check when it notices
|
||||
these bogus entries. I've seen a couple reports of this.
|
||||
|
||||
Boris further explains the failure mode:
|
||||
|
||||
> It is actually more of an optimization which assumes that paging-structure
|
||||
> entries are in WB DRAM:
|
||||
>
|
||||
> "TlbCacheDis: cacheable memory disable. Read-write. 0=Enables
|
||||
> performance optimization that assumes PML4, PDP, PDE, and PTE entries
|
||||
> are in cacheable WB-DRAM; memory type checks may be bypassed, and
|
||||
> addresses outside of WB-DRAM may result in undefined behavior or NB
|
||||
> protocol errors. 1=Disables performance optimization and allows PML4,
|
||||
> PDP, PDE and PTE entries to be in any memory type. Operating systems
|
||||
> that maintain page tables in memory types other than WB- DRAM must set
|
||||
> TlbCacheDis to insure proper operation."
|
||||
>
|
||||
> The MCE generated is an NB protocol error to signal that
|
||||
>
|
||||
> "Link: A specific coherent-only packet from a CPU was issued to an
|
||||
> IO link. This may be caused by software which addresses page table
|
||||
> structures in a memory type other than cacheable WB-DRAM without
|
||||
> properly configuring MSRC001_0015[TlbCacheDis]. This may occur, for
|
||||
> example, when page table structure addresses are above top of memory. In
|
||||
> such cases, the NB will generate an MCE if it sees a mismatch between
|
||||
> the memory operation generated by the core and the link type."
|
||||
>
|
||||
> I'm assuming coherent-only packets don't go out on IO links, thus the
|
||||
> error.
|
||||
|
||||
To fix this, reinstate TLB coherence in lazy mode. With this patch
|
||||
applied, we do it in one of two ways:
|
||||
|
||||
- If we have PCID, we simply switch back to init_mm's page tables
|
||||
when we enter a kernel thread -- this seems to be quite cheap
|
||||
except for the cost of serializing the CPU.
|
||||
|
||||
- If we don't have PCID, then we set a flag and switch to init_mm
|
||||
the first time we would otherwise need to flush the TLB.
|
||||
|
||||
The /sys/kernel/debug/x86/tlb_use_lazy_mode debug switch can be changed
|
||||
to override the default mode for benchmarking.
|
||||
|
||||
In theory, we could optimize this better by only flushing the TLB in
|
||||
lazy CPUs when a page table is freed. Doing that would require
|
||||
auditing the mm code to make sure that all page table freeing goes
|
||||
through tlb_remove_page() as well as reworking some data structures
|
||||
to implement the improved flush logic.
|
||||
|
||||
Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
|
||||
Reported-by: Adam Borowski <kilobyte@angband.pl>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Signed-off-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Daniel Borkmann <daniel@iogearbox.net>
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Cc: Johannes Hirte <johannes.hirte@datenkhaos.de>
|
||||
Cc: Kees Cook <keescook@chromium.org>
|
||||
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Nadav Amit <nadav.amit@gmail.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Roman Kagan <rkagan@virtuozzo.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Fixes: 94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
|
||||
Link: http://lkml.kernel.org/r/20171009170231.fkpraqokz6e4zeco@pd.tnic
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit b956575bed91ecfb136a8300742ecbbf451471ab)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a4bb9409c548ece51ec246fc5113a32b8d130142)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 8 +-
|
||||
arch/x86/include/asm/tlbflush.h | 24 ++++++
|
||||
arch/x86/mm/tlb.c | 160 +++++++++++++++++++++++++------------
|
||||
3 files changed, 136 insertions(+), 56 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index c120b5db178a..3c856a15b98e 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
}
|
||||
|
||||
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
-{
|
||||
- int cpu = smp_processor_id();
|
||||
-
|
||||
- if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
|
||||
- cpumask_clear_cpu(cpu, mm_cpumask(mm));
|
||||
-}
|
||||
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
|
||||
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index d23e61dc0640..6533da3036c9 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -82,6 +82,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
|
||||
#endif
|
||||
|
||||
+/*
|
||||
+ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
|
||||
+ * to init_mm when we switch to a kernel thread (e.g. the idle thread). If
|
||||
+ * it's false, then we immediately switch CR3 when entering a kernel thread.
|
||||
+ */
|
||||
+DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
|
||||
+
|
||||
/*
|
||||
* 6 because 6 should be plenty and struct tlb_state will fit in
|
||||
* two cache lines.
|
||||
@@ -104,6 +111,23 @@ struct tlb_state {
|
||||
u16 loaded_mm_asid;
|
||||
u16 next_asid;
|
||||
|
||||
+ /*
|
||||
+ * We can be in one of several states:
|
||||
+ *
|
||||
+ * - Actively using an mm. Our CPU's bit will be set in
|
||||
+ * mm_cpumask(loaded_mm) and is_lazy == false;
|
||||
+ *
|
||||
+ * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
|
||||
+ * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
|
||||
+ *
|
||||
+ * - Lazily using a real mm. loaded_mm != &init_mm, our bit
|
||||
+ * is set in mm_cpumask(loaded_mm), but is_lazy == true.
|
||||
+ * We're heuristically guessing that the CR3 load we
|
||||
+ * skipped more than makes up for the overhead added by
|
||||
+ * lazy mode.
|
||||
+ */
|
||||
+ bool is_lazy;
|
||||
+
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
* disabling interrupts when modifying either one.
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 440400316c8a..b27aceaf7ed1 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -30,6 +30,8 @@
|
||||
|
||||
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
|
||||
|
||||
+DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
|
||||
+
|
||||
static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
|
||||
u16 *new_asid, bool *need_flush)
|
||||
{
|
||||
@@ -80,7 +82,7 @@ void leave_mm(int cpu)
|
||||
return;
|
||||
|
||||
/* Warn if we're not lazy. */
|
||||
- WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
|
||||
+ WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
|
||||
|
||||
switch_mm(NULL, &init_mm, NULL);
|
||||
}
|
||||
@@ -140,52 +142,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
__flush_tlb_all();
|
||||
}
|
||||
#endif
|
||||
+ this_cpu_write(cpu_tlbstate.is_lazy, false);
|
||||
|
||||
if (real_prev == next) {
|
||||
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||
next->context.ctx_id);
|
||||
|
||||
- if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||
- /*
|
||||
- * There's nothing to do: we weren't lazy, and we
|
||||
- * aren't changing our mm. We don't need to flush
|
||||
- * anything, nor do we need to update CR3, CR4, or
|
||||
- * LDTR.
|
||||
- */
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- /* Resume remote flushes and then read tlb_gen. */
|
||||
- cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
- next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
-
|
||||
- if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
|
||||
- next_tlb_gen) {
|
||||
- /*
|
||||
- * Ideally, we'd have a flush_tlb() variant that
|
||||
- * takes the known CR3 value as input. This would
|
||||
- * be faster on Xen PV and on hypothetical CPUs
|
||||
- * on which INVPCID is fast.
|
||||
- */
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
|
||||
- next_tlb_gen);
|
||||
- write_cr3(build_cr3(next, prev_asid));
|
||||
-
|
||||
- /*
|
||||
- * This gets called via leave_mm() in the idle path
|
||||
- * where RCU functions differently. Tracing normally
|
||||
- * uses RCU, so we have to call the tracepoint
|
||||
- * specially here.
|
||||
- */
|
||||
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
- TLB_FLUSH_ALL);
|
||||
- }
|
||||
-
|
||||
/*
|
||||
- * We just exited lazy mode, which means that CR4 and/or LDTR
|
||||
- * may be stale. (Changes to the required CR4 and LDTR states
|
||||
- * are not reflected in tlb_gen.)
|
||||
+ * We don't currently support having a real mm loaded without
|
||||
+ * our cpu set in mm_cpumask(). We have all the bookkeeping
|
||||
+ * in place to figure out whether we would need to flush
|
||||
+ * if our cpu were cleared in mm_cpumask(), but we don't
|
||||
+ * currently use it.
|
||||
*/
|
||||
+ if (WARN_ON_ONCE(real_prev != &init_mm &&
|
||||
+ !cpumask_test_cpu(cpu, mm_cpumask(next))))
|
||||
+ cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+
|
||||
+ return;
|
||||
} else {
|
||||
u16 new_asid;
|
||||
bool need_flush;
|
||||
@@ -204,10 +178,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
}
|
||||
|
||||
/* Stop remote flushes for the previous mm */
|
||||
- if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
|
||||
- cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
-
|
||||
- VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
|
||||
+ real_prev != &init_mm);
|
||||
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
|
||||
/*
|
||||
* Start remote flushes and then read tlb_gen.
|
||||
@@ -237,6 +210,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
|
||||
+ * kernel thread or other context without an mm. Acceptable implementations
|
||||
+ * include doing nothing whatsoever, switching to init_mm, or various clever
|
||||
+ * lazy tricks to try to minimize TLB flushes.
|
||||
+ *
|
||||
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
|
||||
+ * in a row. It will notify us that we're going back to a real mm by
|
||||
+ * calling switch_mm_irqs_off().
|
||||
+ */
|
||||
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
+{
|
||||
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
|
||||
+ return;
|
||||
+
|
||||
+ if (static_branch_unlikely(&tlb_use_lazy_mode)) {
|
||||
+ /*
|
||||
+ * There's a significant optimization that may be possible
|
||||
+ * here. We have accurate enough TLB flush tracking that we
|
||||
+ * don't need to maintain coherence of TLB per se when we're
|
||||
+ * lazy. We do, however, need to maintain coherence of
|
||||
+ * paging-structure caches. We could, in principle, leave our
|
||||
+ * old mm loaded and only switch to init_mm when
|
||||
+ * tlb_remove_page() happens.
|
||||
+ */
|
||||
+ this_cpu_write(cpu_tlbstate.is_lazy, true);
|
||||
+ } else {
|
||||
+ switch_mm(NULL, &init_mm, NULL);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Call this when reinitializing a CPU. It fixes the following potential
|
||||
* problems:
|
||||
@@ -308,16 +312,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
/* This code cannot presently handle being reentered. */
|
||||
VM_WARN_ON(!irqs_disabled());
|
||||
|
||||
+ if (unlikely(loaded_mm == &init_mm))
|
||||
+ return;
|
||||
+
|
||||
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
|
||||
loaded_mm->context.ctx_id);
|
||||
|
||||
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
|
||||
+ if (this_cpu_read(cpu_tlbstate.is_lazy)) {
|
||||
/*
|
||||
- * We're in lazy mode -- don't flush. We can get here on
|
||||
- * remote flushes due to races and on local flushes if a
|
||||
- * kernel thread coincidentally flushes the mm it's lazily
|
||||
- * still using.
|
||||
+ * We're in lazy mode. We need to at least flush our
|
||||
+ * paging-structure cache to avoid speculatively reading
|
||||
+ * garbage into our TLB. Since switching to init_mm is barely
|
||||
+ * slower than a minimal flush, just switch to init_mm.
|
||||
*/
|
||||
+ switch_mm_irqs_off(NULL, &init_mm, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -616,3 +624,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
|
||||
return 0;
|
||||
}
|
||||
late_initcall(create_tlb_single_page_flush_ceiling);
|
||||
+
|
||||
+static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
|
||||
+ size_t count, loff_t *ppos)
|
||||
+{
|
||||
+ char buf[2];
|
||||
+
|
||||
+ buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
|
||||
+ buf[1] = '\n';
|
||||
+
|
||||
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
|
||||
+}
|
||||
+
|
||||
+static ssize_t tlblazy_write_file(struct file *file,
|
||||
+ const char __user *user_buf, size_t count, loff_t *ppos)
|
||||
+{
|
||||
+ bool val;
|
||||
+
|
||||
+ if (kstrtobool_from_user(user_buf, count, &val))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (val)
|
||||
+ static_branch_enable(&tlb_use_lazy_mode);
|
||||
+ else
|
||||
+ static_branch_disable(&tlb_use_lazy_mode);
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations fops_tlblazy = {
|
||||
+ .read = tlblazy_read_file,
|
||||
+ .write = tlblazy_write_file,
|
||||
+ .llseek = default_llseek,
|
||||
+};
|
||||
+
|
||||
+static int __init init_tlb_use_lazy_mode(void)
|
||||
+{
|
||||
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ /*
|
||||
+ * Heuristic: with PCID on, switching to and from
|
||||
+ * init_mm is reasonably fast, but remote flush IPIs
|
||||
+ * as expensive as ever, so turn off lazy TLB mode.
|
||||
+ *
|
||||
+ * We can't do this in setup_pcid() because static keys
|
||||
+ * haven't been initialized yet, and it would blow up
|
||||
+ * badly.
|
||||
+ */
|
||||
+ static_branch_disable(&tlb_use_lazy_mode);
|
||||
+ }
|
||||
+
|
||||
+ debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
|
||||
+ arch_debugfs_dir, NULL, &fops_tlblazy);
|
||||
+ return 0;
|
||||
+}
|
||||
+late_initcall(init_tlb_use_lazy_mode);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,101 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sat, 4 Nov 2017 04:16:12 -0700
|
||||
Subject: [PATCH] Revert "x86/mm: Stop calling leave_mm() in idle code"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This reverts commit 43858b4f25cf0adc5c2ca9cf5ce5fdf2532941e5.
|
||||
|
||||
The reason I removed the leave_mm() calls in question is because the
|
||||
heuristic wasn't needed after that patch. With the original version
|
||||
of my PCID series, we never flushed a "lazy cpu" (i.e. a CPU running
|
||||
kernel thread) due a flush on the loaded mm.
|
||||
|
||||
Unfortunately, that caused architectural issues, so now I've
|
||||
reinstated these flushes on non-PCID systems in:
|
||||
|
||||
commit b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode").
|
||||
|
||||
That, in turn, gives us a power management and occasionally
|
||||
performance regression as compared to old kernels: a process that
|
||||
goes into a deep idle state on a given CPU and gets its mm flushed
|
||||
due to activity on a different CPU will wake the idle CPU.
|
||||
|
||||
Reinstate the old ugly heuristic: if a CPU goes into ACPI C3 or an
|
||||
intel_idle state that is likely to cause a TLB flush gets its mm
|
||||
switched to init_mm before going idle.
|
||||
|
||||
FWIW, this heuristic is lousy. Whether we should change CR3 before
|
||||
idle isn't a good hint except insofar as the performance hit is a bit
|
||||
lower if the TLB is getting flushed by the idle code anyway. What we
|
||||
really want to know is whether we anticipate being idle long enough
|
||||
that the mm is likely to be flushed before we wake up. This is more a
|
||||
matter of the expected latency than the idle state that gets chosen.
|
||||
This heuristic also completely fails on systems that don't know
|
||||
whether the TLB will be flushed (e.g. AMD systems?). OTOH it may be a
|
||||
bit obsolete anyway -- PCID systems don't presently benefit from this
|
||||
heuristic at all.
|
||||
|
||||
We also shouldn't do this callback from innermost bit of the idle code
|
||||
due to the RCU nastiness it causes. All the information need is
|
||||
available before rcu_idle_enter() needs to happen.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Fixes: 43858b4f25cf "x86/mm: Stop calling leave_mm() in idle code"
|
||||
Link: http://lkml.kernel.org/r/c513bbd4e653747213e05bc7062de000bf0202a5.1509793738.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 675357362aeba19688440eb1aaa7991067f73b12)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b607843145fd0593fcd87e2596d1dc5a1d5f79a5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/mm/tlb.c | 16 +++++++++++++---
|
||||
1 file changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index b27aceaf7ed1..ed06f1593390 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -194,12 +194,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
write_cr3(build_cr3(next, new_asid));
|
||||
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
- TLB_FLUSH_ALL);
|
||||
+
|
||||
+ /*
|
||||
+ * NB: This gets called via leave_mm() in the idle path
|
||||
+ * where RCU functions differently. Tracing normally
|
||||
+ * uses RCU, so we need to use the _rcuidle variant.
|
||||
+ *
|
||||
+ * (There is no good reason for this. The idle code should
|
||||
+ * be rearranged to call this before rcu_idle_enter().)
|
||||
+ */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
} else {
|
||||
/* The new ASID is already up to date. */
|
||||
write_cr3(build_cr3_noflush(next, new_asid));
|
||||
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
+
|
||||
+ /* See above wrt _rcuidle. */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
}
|
||||
|
||||
this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,85 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 3 Oct 2017 08:51:43 -0500
|
||||
Subject: [PATCH] kprobes/x86: Set up frame pointer in kprobe trampoline
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Richard Weinberger saw an unwinder warning when running bcc's opensnoop:
|
||||
|
||||
WARNING: kernel stack frame pointer at ffff99ef4076bea0 in opensnoop:2008 has bad value 0000000000000008
|
||||
unwind stack type:0 next_sp: (null) mask:0x2 graph_idx:0
|
||||
...
|
||||
ffff99ef4076be88: ffff99ef4076bea0 (0xffff99ef4076bea0)
|
||||
ffff99ef4076be90: ffffffffac442721 (optimized_callback +0x81/0x90)
|
||||
...
|
||||
|
||||
A lockdep stack trace was initiated from inside a kprobe handler, when
|
||||
the unwinder noticed a bad frame pointer on the stack. The bad frame
|
||||
pointer is related to the fact that the kprobe optprobe trampoline
|
||||
doesn't save the frame pointer before calling into optimized_callback().
|
||||
|
||||
Reported-and-tested-by: Richard Weinberger <richard@sigma-star.at>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
|
||||
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
|
||||
Cc: David S . Miller <davem@davemloft.net>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/7aef2f8ecd75c2f505ef9b80490412262cf4a44c.1507038547.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit ee213fc72fd67d0988525af501534f4cb924d1e9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0f7d5518c91335584b16c7bed1c54c10b78ea76a)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/kprobes/common.h | 13 +++++++++++--
|
||||
1 file changed, 11 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
|
||||
index db2182d63ed0..3fc0f9a794cb 100644
|
||||
--- a/arch/x86/kernel/kprobes/common.h
|
||||
+++ b/arch/x86/kernel/kprobes/common.h
|
||||
@@ -3,6 +3,15 @@
|
||||
|
||||
/* Kprobes and Optprobes common header */
|
||||
|
||||
+#include <asm/asm.h>
|
||||
+
|
||||
+#ifdef CONFIG_FRAME_POINTER
|
||||
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
|
||||
+ " mov %" _ASM_SP ", %" _ASM_BP "\n"
|
||||
+#else
|
||||
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_X86_64
|
||||
#define SAVE_REGS_STRING \
|
||||
/* Skip cs, ip, orig_ax. */ \
|
||||
@@ -17,7 +26,7 @@
|
||||
" pushq %r10\n" \
|
||||
" pushq %r11\n" \
|
||||
" pushq %rbx\n" \
|
||||
- " pushq %rbp\n" \
|
||||
+ SAVE_RBP_STRING \
|
||||
" pushq %r12\n" \
|
||||
" pushq %r13\n" \
|
||||
" pushq %r14\n" \
|
||||
@@ -48,7 +57,7 @@
|
||||
" pushl %es\n" \
|
||||
" pushl %ds\n" \
|
||||
" pushl %eax\n" \
|
||||
- " pushl %ebp\n" \
|
||||
+ SAVE_RBP_STRING \
|
||||
" pushl %edi\n" \
|
||||
" pushl %esi\n" \
|
||||
" pushl %edx\n" \
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,139 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:21 +0200
|
||||
Subject: [PATCH] x86/tracing: Introduce a static key for exception tracing
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Switching the IDT just for avoiding tracepoints creates a completely
|
||||
impenetrable macro/inline/ifdef mess.
|
||||
|
||||
There is no point in avoiding tracepoints for most of the traps/exceptions.
|
||||
For the more expensive tracepoints, like pagefaults, this can be handled with
|
||||
an explicit static key.
|
||||
|
||||
Preparatory patch to remove the tracing IDT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064956.593094539@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2feb1b316d48004d905278c02a55902cab0be8be)
|
||||
Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
|
||||
(cherry picked from commit 15e0ff2a63fdd93f8881e2ebba5c048c5b601e57)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d58a56e851c339d8d9d311dc9b4fad6abbf8bf19)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/trace/common.h | 15 +++++++++++++++
|
||||
arch/x86/include/asm/trace/exceptions.h | 4 +---
|
||||
arch/x86/include/asm/trace/irq_vectors.h | 4 +---
|
||||
arch/x86/kernel/tracepoint.c | 9 ++++++++-
|
||||
4 files changed, 25 insertions(+), 7 deletions(-)
|
||||
create mode 100644 arch/x86/include/asm/trace/common.h
|
||||
|
||||
diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
|
||||
new file mode 100644
|
||||
index 000000000000..b1eb7b18ee8a
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/include/asm/trace/common.h
|
||||
@@ -0,0 +1,15 @@
|
||||
+#ifndef _ASM_TRACE_COMMON_H
|
||||
+#define _ASM_TRACE_COMMON_H
|
||||
+
|
||||
+extern int trace_irq_vector_regfunc(void);
|
||||
+extern void trace_irq_vector_unregfunc(void);
|
||||
+
|
||||
+#ifdef CONFIG_TRACING
|
||||
+DECLARE_STATIC_KEY_FALSE(trace_irqvectors_key);
|
||||
+#define trace_irqvectors_enabled() \
|
||||
+ static_branch_unlikely(&trace_irqvectors_key)
|
||||
+#else
|
||||
+static inline bool trace_irqvectors_enabled(void) { return false; }
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
|
||||
index 2422b14c50a7..960a5b50ac3b 100644
|
||||
--- a/arch/x86/include/asm/trace/exceptions.h
|
||||
+++ b/arch/x86/include/asm/trace/exceptions.h
|
||||
@@ -5,9 +5,7 @@
|
||||
#define _TRACE_PAGE_FAULT_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
-
|
||||
-extern int trace_irq_vector_regfunc(void);
|
||||
-extern void trace_irq_vector_unregfunc(void);
|
||||
+#include <asm/trace/common.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(x86_exceptions,
|
||||
|
||||
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
|
||||
index 32dd6a9e343c..7825b4426e7e 100644
|
||||
--- a/arch/x86/include/asm/trace/irq_vectors.h
|
||||
+++ b/arch/x86/include/asm/trace/irq_vectors.h
|
||||
@@ -5,9 +5,7 @@
|
||||
#define _TRACE_IRQ_VECTORS_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
-
|
||||
-extern int trace_irq_vector_regfunc(void);
|
||||
-extern void trace_irq_vector_unregfunc(void);
|
||||
+#include <asm/trace/common.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(x86_irq_vector,
|
||||
|
||||
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
|
||||
index 15515132bf0d..dd4aa04bb95c 100644
|
||||
--- a/arch/x86/kernel/tracepoint.c
|
||||
+++ b/arch/x86/kernel/tracepoint.c
|
||||
@@ -4,9 +4,11 @@
|
||||
* Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
|
||||
*
|
||||
*/
|
||||
+#include <linux/jump_label.h>
|
||||
+#include <linux/atomic.h>
|
||||
+
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/desc.h>
|
||||
-#include <linux/atomic.h>
|
||||
|
||||
atomic_t trace_idt_ctr = ATOMIC_INIT(0);
|
||||
struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
|
||||
@@ -15,6 +17,7 @@ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
|
||||
/* No need to be aligned, but done to keep all IDTs defined the same way. */
|
||||
gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
|
||||
|
||||
+DEFINE_STATIC_KEY_FALSE(trace_irqvectors_key);
|
||||
static int trace_irq_vector_refcount;
|
||||
static DEFINE_MUTEX(irq_vector_mutex);
|
||||
|
||||
@@ -36,6 +39,8 @@ static void switch_idt(void *arg)
|
||||
|
||||
int trace_irq_vector_regfunc(void)
|
||||
{
|
||||
+ static_branch_inc(&trace_irqvectors_key);
|
||||
+
|
||||
mutex_lock(&irq_vector_mutex);
|
||||
if (!trace_irq_vector_refcount) {
|
||||
set_trace_idt_ctr(1);
|
||||
@@ -49,6 +54,8 @@ int trace_irq_vector_regfunc(void)
|
||||
|
||||
void trace_irq_vector_unregfunc(void)
|
||||
{
|
||||
+ static_branch_dec(&trace_irqvectors_key);
|
||||
+
|
||||
mutex_lock(&irq_vector_mutex);
|
||||
trace_irq_vector_refcount--;
|
||||
if (!trace_irq_vector_refcount) {
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,189 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Mon, 17 Jul 2017 16:10:33 -0500
|
||||
Subject: [PATCH] x86/boot: Add early cmdline parsing for options with
|
||||
arguments
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add a cmdline_find_option() function to look for cmdline options that
|
||||
take arguments. The argument is returned in a supplied buffer and the
|
||||
argument length (regardless of whether it fits in the supplied buffer)
|
||||
is returned, with -1 indicating not found.
|
||||
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Cc: Dave Young <dyoung@redhat.com>
|
||||
Cc: Dmitry Vyukov <dvyukov@google.com>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Cc: Larry Woodman <lwoodman@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Matt Fleming <matt@codeblueprint.co.uk>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Radim Krčmář <rkrcmar@redhat.com>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
|
||||
Cc: kasan-dev@googlegroups.com
|
||||
Cc: kvm@vger.kernel.org
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: linux-doc@vger.kernel.org
|
||||
Cc: linux-efi@vger.kernel.org
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e505371dd83963caae1a37ead9524e8d997341be)
|
||||
Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
|
||||
(cherry picked from commit 37569cd003aa69a57d5666530436c2d973a57b26)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b9f03418aa9b8ecbb1c7f32ac2bfe68fd21de4f5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/cmdline.h | 2 +
|
||||
arch/x86/lib/cmdline.c | 105 +++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 107 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
|
||||
index e01f7f7ccb0c..84ae170bc3d0 100644
|
||||
--- a/arch/x86/include/asm/cmdline.h
|
||||
+++ b/arch/x86/include/asm/cmdline.h
|
||||
@@ -2,5 +2,7 @@
|
||||
#define _ASM_X86_CMDLINE_H
|
||||
|
||||
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
|
||||
+int cmdline_find_option(const char *cmdline_ptr, const char *option,
|
||||
+ char *buffer, int bufsize);
|
||||
|
||||
#endif /* _ASM_X86_CMDLINE_H */
|
||||
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
|
||||
index 5cc78bf57232..3261abb21ef4 100644
|
||||
--- a/arch/x86/lib/cmdline.c
|
||||
+++ b/arch/x86/lib/cmdline.c
|
||||
@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
|
||||
return 0; /* Buffer overrun */
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Find a non-boolean option (i.e. option=argument). In accordance with
|
||||
+ * standard Linux practice, if this option is repeated, this returns the
|
||||
+ * last instance on the command line.
|
||||
+ *
|
||||
+ * @cmdline: the cmdline string
|
||||
+ * @max_cmdline_size: the maximum size of cmdline
|
||||
+ * @option: option string to look for
|
||||
+ * @buffer: memory buffer to return the option argument
|
||||
+ * @bufsize: size of the supplied memory buffer
|
||||
+ *
|
||||
+ * Returns the length of the argument (regardless of if it was
|
||||
+ * truncated to fit in the buffer), or -1 on not found.
|
||||
+ */
|
||||
+static int
|
||||
+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
|
||||
+ const char *option, char *buffer, int bufsize)
|
||||
+{
|
||||
+ char c;
|
||||
+ int pos = 0, len = -1;
|
||||
+ const char *opptr = NULL;
|
||||
+ char *bufptr = buffer;
|
||||
+ enum {
|
||||
+ st_wordstart = 0, /* Start of word/after whitespace */
|
||||
+ st_wordcmp, /* Comparing this word */
|
||||
+ st_wordskip, /* Miscompare, skip */
|
||||
+ st_bufcpy, /* Copying this to buffer */
|
||||
+ } state = st_wordstart;
|
||||
+
|
||||
+ if (!cmdline)
|
||||
+ return -1; /* No command line */
|
||||
+
|
||||
+ /*
|
||||
+ * This 'pos' check ensures we do not overrun
|
||||
+ * a non-NULL-terminated 'cmdline'
|
||||
+ */
|
||||
+ while (pos++ < max_cmdline_size) {
|
||||
+ c = *(char *)cmdline++;
|
||||
+ if (!c)
|
||||
+ break;
|
||||
+
|
||||
+ switch (state) {
|
||||
+ case st_wordstart:
|
||||
+ if (myisspace(c))
|
||||
+ break;
|
||||
+
|
||||
+ state = st_wordcmp;
|
||||
+ opptr = option;
|
||||
+ /* fall through */
|
||||
+
|
||||
+ case st_wordcmp:
|
||||
+ if ((c == '=') && !*opptr) {
|
||||
+ /*
|
||||
+ * We matched all the way to the end of the
|
||||
+ * option we were looking for, prepare to
|
||||
+ * copy the argument.
|
||||
+ */
|
||||
+ len = 0;
|
||||
+ bufptr = buffer;
|
||||
+ state = st_bufcpy;
|
||||
+ break;
|
||||
+ } else if (c == *opptr++) {
|
||||
+ /*
|
||||
+ * We are currently matching, so continue
|
||||
+ * to the next character on the cmdline.
|
||||
+ */
|
||||
+ break;
|
||||
+ }
|
||||
+ state = st_wordskip;
|
||||
+ /* fall through */
|
||||
+
|
||||
+ case st_wordskip:
|
||||
+ if (myisspace(c))
|
||||
+ state = st_wordstart;
|
||||
+ break;
|
||||
+
|
||||
+ case st_bufcpy:
|
||||
+ if (myisspace(c)) {
|
||||
+ state = st_wordstart;
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * Increment len, but don't overrun the
|
||||
+ * supplied buffer and leave room for the
|
||||
+ * NULL terminator.
|
||||
+ */
|
||||
+ if (++len < bufsize)
|
||||
+ *bufptr++ = c;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (bufsize)
|
||||
+ *bufptr = '\0';
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
int cmdline_find_option_bool(const char *cmdline, const char *option)
|
||||
{
|
||||
return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
|
||||
}
|
||||
+
|
||||
+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
|
||||
+ int bufsize)
|
||||
+{
|
||||
+ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
|
||||
+ buffer, bufsize);
|
||||
+}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,192 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Sat, 9 Sep 2017 00:56:03 +0300
|
||||
Subject: [PATCH] mm, x86/mm: Fix performance regression in
|
||||
get_user_pages_fast()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The 0-day test bot found a performance regression that was tracked down to
|
||||
switching x86 to the generic get_user_pages_fast() implementation:
|
||||
|
||||
http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
|
||||
|
||||
The regression was caused by the fact that we now use local_irq_save() +
|
||||
local_irq_restore() in get_user_pages_fast() to disable interrupts.
|
||||
In x86 implementation local_irq_disable() + local_irq_enable() was used.
|
||||
|
||||
The fix is to make get_user_pages_fast() use local_irq_disable(),
|
||||
leaving local_irq_save() for __get_user_pages_fast() that can be called
|
||||
with interrupts disabled.
|
||||
|
||||
Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
|
||||
|
||||
Before: Average: 14.91 ms, stddev: 0.45 ms
|
||||
After: Average: 10.76 ms, stddev: 0.18 ms
|
||||
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Huang Ying <ying.huang@intel.com>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
|
||||
Cc: linux-mm@kvack.org
|
||||
Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
|
||||
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 5241f4b2c68284612e34910305f3234e4a64701b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
mm/gup.c | 97 ++++++++++++++++++++++++++++++++++++++--------------------------
|
||||
1 file changed, 58 insertions(+), 39 deletions(-)
|
||||
|
||||
diff --git a/mm/gup.c b/mm/gup.c
|
||||
index 23f01c40c88f..4a789f1c6a27 100644
|
||||
--- a/mm/gup.c
|
||||
+++ b/mm/gup.c
|
||||
@@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
return 1;
|
||||
}
|
||||
|
||||
+static void gup_pgd_range(unsigned long addr, unsigned long end,
|
||||
+ int write, struct page **pages, int *nr)
|
||||
+{
|
||||
+ unsigned long next;
|
||||
+ pgd_t *pgdp;
|
||||
+
|
||||
+ pgdp = pgd_offset(current->mm, addr);
|
||||
+ do {
|
||||
+ pgd_t pgd = READ_ONCE(*pgdp);
|
||||
+
|
||||
+ next = pgd_addr_end(addr, end);
|
||||
+ if (pgd_none(pgd))
|
||||
+ return;
|
||||
+ if (unlikely(pgd_huge(pgd))) {
|
||||
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
||||
+ pages, nr))
|
||||
+ return;
|
||||
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
||||
+ PGDIR_SHIFT, next, write, pages, nr))
|
||||
+ return;
|
||||
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
|
||||
+ return;
|
||||
+ } while (pgdp++, addr = next, addr != end);
|
||||
+}
|
||||
+
|
||||
+#ifndef gup_fast_permitted
|
||||
+/*
|
||||
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
|
||||
+ * we need to fall back to the slow version:
|
||||
+ */
|
||||
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
+{
|
||||
+ unsigned long len, end;
|
||||
+
|
||||
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
+ end = start + len;
|
||||
+ return end >= start;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
|
||||
* the regular GUP. It will only return non-negative values.
|
||||
@@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
- struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
- unsigned long next, flags;
|
||||
- pgd_t *pgdp;
|
||||
+ unsigned long flags;
|
||||
int nr = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
@@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
* block IPIs that come from THPs splitting.
|
||||
*/
|
||||
|
||||
- local_irq_save(flags);
|
||||
- pgdp = pgd_offset(mm, addr);
|
||||
- do {
|
||||
- pgd_t pgd = READ_ONCE(*pgdp);
|
||||
-
|
||||
- next = pgd_addr_end(addr, end);
|
||||
- if (pgd_none(pgd))
|
||||
- break;
|
||||
- if (unlikely(pgd_huge(pgd))) {
|
||||
- if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
||||
- pages, &nr))
|
||||
- break;
|
||||
- } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
||||
- PGDIR_SHIFT, next, write, pages, &nr))
|
||||
- break;
|
||||
- } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
|
||||
- break;
|
||||
- } while (pgdp++, addr = next, addr != end);
|
||||
- local_irq_restore(flags);
|
||||
+ if (gup_fast_permitted(start, nr_pages, write)) {
|
||||
+ local_irq_save(flags);
|
||||
+ gup_pgd_range(addr, end, write, pages, &nr);
|
||||
+ local_irq_restore(flags);
|
||||
+ }
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
-#ifndef gup_fast_permitted
|
||||
-/*
|
||||
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
|
||||
- * we need to fall back to the slow version:
|
||||
- */
|
||||
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
-{
|
||||
- unsigned long len, end;
|
||||
-
|
||||
- len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
- end = start + len;
|
||||
- return end >= start;
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
/**
|
||||
* get_user_pages_fast() - pin user pages in memory
|
||||
* @start: starting user address
|
||||
@@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
+ unsigned long addr, len, end;
|
||||
int nr = 0, ret = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
+ addr = start;
|
||||
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
+ end = start + len;
|
||||
+
|
||||
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
|
||||
+ (void __user *)start, len)))
|
||||
+ return 0;
|
||||
|
||||
if (gup_fast_permitted(start, nr_pages, write)) {
|
||||
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
||||
+ local_irq_disable();
|
||||
+ gup_pgd_range(addr, end, write, pages, &nr);
|
||||
+ local_irq_enable();
|
||||
ret = nr;
|
||||
}
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,149 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Uros Bizjak <ubizjak@gmail.com>
|
||||
Date: Wed, 6 Sep 2017 17:18:08 +0200
|
||||
Subject: [PATCH] x86/asm: Remove unnecessary \n\t in front of CC_SET() from
|
||||
asm templates
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
There is no need for \n\t in front of CC_SET(), as the macro already includes these two.
|
||||
|
||||
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/20170906151808.5634-1-ubizjak@gmail.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit 3c52b5c64326d9dcfee4e10611c53ec1b1b20675)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 1c3f29ec5586e3aecfde2c6f83b8786e1aecd9ac)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/archrandom.h | 8 ++++----
|
||||
arch/x86/include/asm/bitops.h | 10 +++++-----
|
||||
arch/x86/include/asm/percpu.h | 2 +-
|
||||
arch/x86/include/asm/rmwcc.h | 2 +-
|
||||
4 files changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
|
||||
index 5b0579abb398..3ac991d81e74 100644
|
||||
--- a/arch/x86/include/asm/archrandom.h
|
||||
+++ b/arch/x86/include/asm/archrandom.h
|
||||
@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
|
||||
bool ok;
|
||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||
do {
|
||||
- asm volatile(RDRAND_LONG "\n\t"
|
||||
+ asm volatile(RDRAND_LONG
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
if (ok)
|
||||
@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||
bool ok;
|
||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||
do {
|
||||
- asm volatile(RDRAND_INT "\n\t"
|
||||
+ asm volatile(RDRAND_INT
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
if (ok)
|
||||
@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||
static inline bool rdseed_long(unsigned long *v)
|
||||
{
|
||||
bool ok;
|
||||
- asm volatile(RDSEED_LONG "\n\t"
|
||||
+ asm volatile(RDSEED_LONG
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
return ok;
|
||||
@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
|
||||
static inline bool rdseed_int(unsigned int *v)
|
||||
{
|
||||
bool ok;
|
||||
- asm volatile(RDSEED_INT "\n\t"
|
||||
+ asm volatile(RDSEED_INT
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
return ok;
|
||||
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
|
||||
index 854022772c5b..8cee8db6dffb 100644
|
||||
--- a/arch/x86/include/asm/bitops.h
|
||||
+++ b/arch/x86/include/asm/bitops.h
|
||||
@@ -142,7 +142,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
bool negative;
|
||||
- asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
|
||||
+ asm volatile(LOCK_PREFIX "andb %2,%1"
|
||||
CC_SET(s)
|
||||
: CC_OUT(s) (negative), ADDR
|
||||
: "ir" ((char) ~(1 << nr)) : "memory");
|
||||
@@ -245,7 +245,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm("bts %2,%1\n\t"
|
||||
+ asm("bts %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr));
|
||||
@@ -285,7 +285,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("btr %2,%1\n\t"
|
||||
+ asm volatile("btr %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr));
|
||||
@@ -297,7 +297,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("btc %2,%1\n\t"
|
||||
+ asm volatile("btc %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr) : "memory");
|
||||
@@ -328,7 +328,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("bt %2,%1\n\t"
|
||||
+ asm volatile("bt %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit)
|
||||
: "m" (*(unsigned long *)addr), "Ir" (nr));
|
||||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
|
||||
index 9fa03604b2b3..b21a475fd7ed 100644
|
||||
--- a/arch/x86/include/asm/percpu.h
|
||||
+++ b/arch/x86/include/asm/percpu.h
|
||||
@@ -525,7 +525,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("bt "__percpu_arg(2)",%1\n\t"
|
||||
+ asm volatile("bt "__percpu_arg(2)",%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit)
|
||||
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
|
||||
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
|
||||
index 661dd305694a..dd7ba5aa8dca 100644
|
||||
--- a/arch/x86/include/asm/rmwcc.h
|
||||
+++ b/arch/x86/include/asm/rmwcc.h
|
||||
@@ -28,7 +28,7 @@ cc_label: \
|
||||
#define __GEN_RMWcc(fullop, var, cc, ...) \
|
||||
do { \
|
||||
bool c; \
|
||||
- asm volatile (fullop ";" CC_SET(cc) \
|
||||
+ asm volatile (fullop CC_SET(cc) \
|
||||
: "+m" (var), CC_OUT(cc) (c) \
|
||||
: __VA_ARGS__ : "memory"); \
|
||||
return c; \
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,58 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:30 -0500
|
||||
Subject: [PATCH] objtool: Don't report end of section error after an empty
|
||||
unwind hint
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
If asm code specifies an UNWIND_HINT_EMPTY hint, don't warn if the
|
||||
section ends unexpectedly. This can happen with the xen-head.S code
|
||||
because the hypercall_page is "text" but it's all zeros.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/ddafe199dd8797e40e3c2777373347eba1d65572.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 00d96180dc38ef872ac471c2d3e14b067cbd895d)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 9d22f903bba24f2ac86de8a81dc1788f9957aca8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
tools/objtool/check.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
|
||||
index 368275de5f23..0a86fd0ac082 100644
|
||||
--- a/tools/objtool/check.c
|
||||
+++ b/tools/objtool/check.c
|
||||
@@ -1652,11 +1652,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
if (insn->dead_end)
|
||||
return 0;
|
||||
|
||||
- insn = next_insn;
|
||||
- if (!insn) {
|
||||
+ if (!next_insn) {
|
||||
+ if (state.cfa.base == CFI_UNDEFINED)
|
||||
+ return 0;
|
||||
WARN("%s: unexpected end of section", sec->name);
|
||||
return 1;
|
||||
}
|
||||
+
|
||||
+ insn = next_insn;
|
||||
}
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,54 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:31 -0500
|
||||
Subject: [PATCH] x86/head: Remove confusing comment
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This comment is actively wrong and confusing. It refers to the
|
||||
registers' stack offsets after the pt_regs has been constructed on the
|
||||
stack, but this code is *before* that.
|
||||
|
||||
At this point the stack just has the standard iret frame, for which no
|
||||
comment should be needed.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/a3c267b770fc56c9b86df9c11c552848248aace2.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 17270717e80de33a884ad328fea5f407d87f6d6a)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 49187e0108184688304260a75d29b789f36f3a2b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 6225550883df..627c798b2f15 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -254,10 +254,6 @@ bad_address:
|
||||
|
||||
__INIT
|
||||
ENTRY(early_idt_handler_array)
|
||||
- # 104(%rsp) %rflags
|
||||
- # 96(%rsp) %cs
|
||||
- # 88(%rsp) %rip
|
||||
- # 80(%rsp) error code
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,48 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:32 -0500
|
||||
Subject: [PATCH] x86/head: Remove unused 'bad_address' code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
It's no longer possible for this code to be executed, so remove it.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/32a46fe92d2083700599b36872b26e7dfd7b7965.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit a8b88e84d124bc92c4808e72b8b8c0e0bb538630)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d790ff35a3a49ef0942a3484f024551433fd2ddf)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 627c798b2f15..37d9905d38d6 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -249,9 +249,6 @@ ENDPROC(start_cpu0)
|
||||
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
|
||||
__FINITDATA
|
||||
|
||||
-bad_address:
|
||||
- jmp bad_address
|
||||
-
|
||||
__INIT
|
||||
ENTRY(early_idt_handler_array)
|
||||
i = 0
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,66 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:33 -0500
|
||||
Subject: [PATCH] x86/head: Fix head ELF function annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
These functions aren't callable C-type functions, so don't annotate them
|
||||
as such.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/36eb182738c28514f8bf95e403d89b6413a88883.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 015a2ea5478680fc5216d56b7ff306f2a74efaf9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 707517a56928fed1c03eefdb4e00fa57dfddc4fd)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 37d9905d38d6..45b18b1a6417 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -218,7 +218,7 @@ ENTRY(secondary_startup_64)
|
||||
pushq %rax # target address in negative space
|
||||
lretq
|
||||
.Lafter_lret:
|
||||
-ENDPROC(secondary_startup_64)
|
||||
+END(secondary_startup_64)
|
||||
|
||||
#include "verify_cpu.S"
|
||||
|
||||
@@ -261,7 +261,7 @@ ENTRY(early_idt_handler_array)
|
||||
i = i + 1
|
||||
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
|
||||
.endr
|
||||
-ENDPROC(early_idt_handler_array)
|
||||
+END(early_idt_handler_array)
|
||||
|
||||
early_idt_handler_common:
|
||||
/*
|
||||
@@ -304,7 +304,7 @@ early_idt_handler_common:
|
||||
20:
|
||||
decl early_recursion_flag(%rip)
|
||||
jmp restore_regs_and_iret
|
||||
-ENDPROC(early_idt_handler_common)
|
||||
+END(early_idt_handler_common)
|
||||
|
||||
__INITDATA
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,52 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:34 -0500
|
||||
Subject: [PATCH] x86/boot: Annotate verify_cpu() as a callable function
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
verify_cpu() is a callable function. Annotate it as such.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/293024b8a080832075312f38c07ccc970fc70292.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e93db75a0054b23a874a12c63376753544f3fe9e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 48a432c46026f864e194cdf9a8133e7c9109274e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/verify_cpu.S | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
|
||||
index 014ea59aa153..3d3c2f71f617 100644
|
||||
--- a/arch/x86/kernel/verify_cpu.S
|
||||
+++ b/arch/x86/kernel/verify_cpu.S
|
||||
@@ -33,7 +33,7 @@
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
-verify_cpu:
|
||||
+ENTRY(verify_cpu)
|
||||
pushf # Save caller passed flags
|
||||
push $0 # Kill any dangerous flags
|
||||
popf
|
||||
@@ -139,3 +139,4 @@ verify_cpu:
|
||||
popf # Restore caller passed flags
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
+ENDPROC(verify_cpu)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,56 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:35 -0500
|
||||
Subject: [PATCH] x86/xen: Fix xen head ELF annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Mark the ends of the startup_xen and hypercall_page code sections.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/3a80a394d30af43d9cefa1a29628c45ed8420c97.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2582d3df95c76d3b686453baf90b64d57e87d1e8)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b9410861f1436c1e38958a9b85009ad252aad9f5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-head.S | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
|
||||
index 72a8e6adebe6..2f0cff2cc265 100644
|
||||
--- a/arch/x86/xen/xen-head.S
|
||||
+++ b/arch/x86/xen/xen-head.S
|
||||
@@ -33,7 +33,7 @@ ENTRY(startup_xen)
|
||||
mov $init_thread_union+THREAD_SIZE, %_ASM_SP
|
||||
|
||||
jmp xen_start_kernel
|
||||
-
|
||||
+END(startup_xen)
|
||||
__FINIT
|
||||
#endif
|
||||
|
||||
@@ -47,7 +47,7 @@ ENTRY(hypercall_page)
|
||||
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
|
||||
#include <asm/xen-hypercalls.h>
|
||||
#undef HYPERCALL
|
||||
-
|
||||
+END(hypercall_page)
|
||||
.popsection
|
||||
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:36 -0500
|
||||
Subject: [PATCH] x86/xen: Add unwind hint annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add unwind hint annotations to the xen head code so the ORC unwinder can
|
||||
read head_64.o.
|
||||
|
||||
hypercall_page needs empty annotations at 32-byte intervals to match the
|
||||
'xen_hypercall_*' ELF functions at those locations.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/70ed2eb516fe9266be766d953f93c2571bca88cc.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit abbe1cac6214d81d2f4e149aba64a8760703144e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 9f099a90cb39eaff9b3187e8a6d8151c8af53db1)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-head.S | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
|
||||
index 2f0cff2cc265..ad189ab2c329 100644
|
||||
--- a/arch/x86/xen/xen-head.S
|
||||
+++ b/arch/x86/xen/xen-head.S
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <asm/boot.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/page_types.h>
|
||||
+#include <asm/unwind_hints.h>
|
||||
|
||||
#include <xen/interface/elfnote.h>
|
||||
#include <xen/interface/features.h>
|
||||
@@ -19,6 +20,7 @@
|
||||
#ifdef CONFIG_XEN_PV
|
||||
__INIT
|
||||
ENTRY(startup_xen)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
cld
|
||||
|
||||
/* Clear .bss */
|
||||
@@ -40,7 +42,10 @@ END(startup_xen)
|
||||
.pushsection .text
|
||||
.balign PAGE_SIZE
|
||||
ENTRY(hypercall_page)
|
||||
- .skip PAGE_SIZE
|
||||
+ .rept (PAGE_SIZE / 32)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
+ .skip 32
|
||||
+ .endr
|
||||
|
||||
#define HYPERCALL(n) \
|
||||
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,134 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:37 -0500
|
||||
Subject: [PATCH] x86/head: Add unwind hint annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Jiri Slaby reported an ORC issue when unwinding from an idle task. The
|
||||
stack was:
|
||||
|
||||
ffffffff811083c2 do_idle+0x142/0x1e0
|
||||
ffffffff8110861d cpu_startup_entry+0x5d/0x60
|
||||
ffffffff82715f58 start_kernel+0x3ff/0x407
|
||||
ffffffff827153e8 x86_64_start_kernel+0x14e/0x15d
|
||||
ffffffff810001bf secondary_startup_64+0x9f/0xa0
|
||||
|
||||
The ORC unwinder errored out at secondary_startup_64 because the head
|
||||
code isn't annotated yet so there wasn't a corresponding ORC entry.
|
||||
|
||||
Fix that and any other head-related unwinding issues by adding unwind
|
||||
hints to the head code.
|
||||
|
||||
Reported-by: Jiri Slaby <jslaby@suse.cz>
|
||||
Tested-by: Jiri Slaby <jslaby@suse.cz>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/78ef000a2f68f545d6eef44ee912edceaad82ccf.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2704fbb672d0d9a19414907fda7949283dcef6a1)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b63a868e404e64172afefea553c6a40963a151db)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/Makefile | 1 -
|
||||
arch/x86/kernel/head_64.S | 14 ++++++++++++--
|
||||
2 files changed, 12 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
|
||||
index 287eac7d207f..e2315aecc441 100644
|
||||
--- a/arch/x86/kernel/Makefile
|
||||
+++ b/arch/x86/kernel/Makefile
|
||||
@@ -26,7 +26,6 @@ KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
|
||||
-OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 45b18b1a6417..d081bc7a027d 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -49,6 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
.code64
|
||||
.globl startup_64
|
||||
startup_64:
|
||||
+ UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||
* and someone has loaded an identity mapped page table
|
||||
@@ -81,6 +82,7 @@ startup_64:
|
||||
movq $(early_top_pgt - __START_KERNEL_map), %rax
|
||||
jmp 1f
|
||||
ENTRY(secondary_startup_64)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||
* and someone has loaded a mapped page table.
|
||||
@@ -116,6 +118,7 @@ ENTRY(secondary_startup_64)
|
||||
movq $1f, %rax
|
||||
jmp *%rax
|
||||
1:
|
||||
+ UNWIND_HINT_EMPTY
|
||||
|
||||
/* Check if nx is implemented */
|
||||
movl $0x80000001, %eax
|
||||
@@ -230,6 +233,7 @@ END(secondary_startup_64)
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movq initial_stack(%rip), %rsp
|
||||
+ UNWIND_HINT_EMPTY
|
||||
jmp .Ljump_to_C_code
|
||||
ENDPROC(start_cpu0)
|
||||
#endif
|
||||
@@ -254,13 +258,18 @@ ENTRY(early_idt_handler_array)
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
- pushq $0 # Dummy error code, to make stack frame uniform
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
+ pushq $0 # Dummy error code, to make stack frame uniform
|
||||
+ .else
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
.endif
|
||||
pushq $i # 72(%rsp) Vector number
|
||||
jmp early_idt_handler_common
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
i = i + 1
|
||||
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
|
||||
.endr
|
||||
+ UNWIND_HINT_IRET_REGS offset=16
|
||||
END(early_idt_handler_array)
|
||||
|
||||
early_idt_handler_common:
|
||||
@@ -289,6 +298,7 @@ early_idt_handler_common:
|
||||
pushq %r13 /* pt_regs->r13 */
|
||||
pushq %r14 /* pt_regs->r14 */
|
||||
pushq %r15 /* pt_regs->r15 */
|
||||
+ UNWIND_HINT_REGS
|
||||
|
||||
cmpq $14,%rsi /* Page fault? */
|
||||
jnz 10f
|
||||
@@ -411,7 +421,7 @@ ENTRY(phys_base)
|
||||
EXPORT_SYMBOL(phys_base)
|
||||
|
||||
#include "../../x86/xen/xen-head.S"
|
||||
-
|
||||
+
|
||||
__PAGE_ALIGNED_BSS
|
||||
NEXT_PAGE(empty_zero_page)
|
||||
.skip PAGE_SIZE
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,43 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Beulich <JBeulich@suse.com>
|
||||
Date: Mon, 25 Sep 2017 02:06:19 -0600
|
||||
Subject: [PATCH] ACPI / APEI: adjust a local variable type in
|
||||
ghes_ioremap_pfn_irq()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Match up with what 7edda0886b ("acpi: apei: handle SEA notification
|
||||
type for ARMv8") did for ghes_ioremap_pfn_nmi().
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
(cherry picked from commit 095f613c6b386a1704b73a549e9ba66c1d5381ae)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0a5c092882b0ead111dc3a6bbaa870665b54d796)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
drivers/acpi/apei/ghes.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
|
||||
index d661d452b238..3628078ee351 100644
|
||||
--- a/drivers/acpi/apei/ghes.c
|
||||
+++ b/drivers/acpi/apei/ghes.c
|
||||
@@ -174,7 +174,8 @@ static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
|
||||
|
||||
static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
|
||||
{
|
||||
- unsigned long vaddr, paddr;
|
||||
+ unsigned long vaddr;
|
||||
+ phys_addr_t paddr;
|
||||
pgprot_t prot;
|
||||
|
||||
vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@kernel.org>
|
||||
Date: Thu, 12 Oct 2017 09:24:30 +0200
|
||||
Subject: [PATCH] x86/unwinder: Make CONFIG_UNWINDER_ORC=y the default in the
|
||||
64-bit defconfig
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Increase testing coverage by turning on the primary x86 unwinder for
|
||||
the 64-bit defconfig.
|
||||
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-kernel@vger.kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 1e4078f0bba46ad61b69548abe6a6faf63b89380)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit ebcba768c005dce435721f6c998e3afdf5534666)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/configs/x86_64_defconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
|
||||
index 4a4b16e56d35..eb65c248708d 100644
|
||||
--- a/arch/x86/configs/x86_64_defconfig
|
||||
+++ b/arch/x86/configs/x86_64_defconfig
|
||||
@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
+CONFIG_ORC_UNWINDER=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,66 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
|
||||
Date: Thu, 12 Oct 2017 18:06:19 -0400
|
||||
Subject: [PATCH] x86/fpu/debug: Remove unused 'x86_fpu_state' and
|
||||
'x86_fpu_deactivate_state' tracepoints
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Commit:
|
||||
|
||||
d1898b733619 ("x86/fpu: Add tracepoints to dump FPU state at key points")
|
||||
|
||||
... added the 'x86_fpu_state' and 'x86_fpu_deactivate_state' trace points,
|
||||
but never used them. Today they are still not used. As they take up
|
||||
and waste memory, remove them.
|
||||
|
||||
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/20171012180619.670b68b6@gandalf.local.home
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 127a1bea40f7f2a36bc7207ea4d51bb6b4e936fa)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c7c367ddb6ffb6af2cfee287960e97c4aefc6548)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/trace/fpu.h | 10 ----------
|
||||
1 file changed, 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
|
||||
index 342e59789fcd..fed7d9ecae60 100644
|
||||
--- a/arch/x86/include/asm/trace/fpu.h
|
||||
+++ b/arch/x86/include/asm/trace/fpu.h
|
||||
@@ -36,11 +36,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
|
||||
)
|
||||
);
|
||||
|
||||
-DEFINE_EVENT(x86_fpu, x86_fpu_state,
|
||||
- TP_PROTO(struct fpu *fpu),
|
||||
- TP_ARGS(fpu)
|
||||
-);
|
||||
-
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
@@ -76,11 +71,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
|
||||
TP_ARGS(fpu)
|
||||
);
|
||||
|
||||
-DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
|
||||
- TP_PROTO(struct fpu *fpu),
|
||||
- TP_ARGS(fpu)
|
||||
-);
|
||||
-
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,273 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 13 Oct 2017 15:02:00 -0500
|
||||
Subject: [PATCH] x86/unwind: Rename unwinder config options to
|
||||
'CONFIG_UNWINDER_*'
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Rename the unwinder config options from:
|
||||
|
||||
CONFIG_ORC_UNWINDER
|
||||
CONFIG_FRAME_POINTER_UNWINDER
|
||||
CONFIG_GUESS_UNWINDER
|
||||
|
||||
to:
|
||||
|
||||
CONFIG_UNWINDER_ORC
|
||||
CONFIG_UNWINDER_FRAME_POINTER
|
||||
CONFIG_UNWINDER_GUESS
|
||||
|
||||
... in order to give them a more logical config namespace.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/73972fc7e2762e91912c6b9584582703d6f1b8cc.1507924831.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 11af847446ed0d131cf24d16a7ef3d5ea7a49554)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27ab2a240a797b073ce63385b1d5db06e44fc3ae)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Documentation/x86/orc-unwinder.txt | 2 +-
|
||||
Makefile | 4 ++--
|
||||
arch/x86/kernel/Makefile | 6 +++---
|
||||
scripts/Makefile.build | 2 +-
|
||||
arch/x86/include/asm/module.h | 2 +-
|
||||
arch/x86/include/asm/unwind.h | 8 ++++----
|
||||
include/asm-generic/vmlinux.lds.h | 2 +-
|
||||
arch/x86/Kconfig | 2 +-
|
||||
arch/x86/Kconfig.debug | 10 +++++-----
|
||||
arch/x86/configs/tiny.config | 4 ++--
|
||||
arch/x86/configs/x86_64_defconfig | 2 +-
|
||||
lib/Kconfig.debug | 2 +-
|
||||
12 files changed, 23 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
|
||||
index af0c9a4c65a6..cd4b29be29af 100644
|
||||
--- a/Documentation/x86/orc-unwinder.txt
|
||||
+++ b/Documentation/x86/orc-unwinder.txt
|
||||
@@ -4,7 +4,7 @@ ORC unwinder
|
||||
Overview
|
||||
--------
|
||||
|
||||
-The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
|
||||
+The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
|
||||
similar in concept to a DWARF unwinder. The difference is that the
|
||||
format of the ORC data is much simpler than DWARF, which in turn allows
|
||||
the ORC unwinder to be much simpler and faster.
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 490ce18685ea..b740e3dc9ff8 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -965,8 +965,8 @@ ifdef CONFIG_STACK_VALIDATION
|
||||
ifeq ($(has_libelf),1)
|
||||
objtool_target := tools/objtool FORCE
|
||||
else
|
||||
- ifdef CONFIG_ORC_UNWINDER
|
||||
- $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ ifdef CONFIG_UNWINDER_ORC
|
||||
+ $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
else
|
||||
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
endif
|
||||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
|
||||
index e2315aecc441..5bf0d5a473b4 100644
|
||||
--- a/arch/x86/kernel/Makefile
|
||||
+++ b/arch/x86/kernel/Makefile
|
||||
@@ -125,9 +125,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
obj-$(CONFIG_TRACING) += tracepoint.o
|
||||
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
|
||||
|
||||
-obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
|
||||
-obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
|
||||
-obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
|
||||
+obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
|
||||
+obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
|
||||
+obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
|
||||
|
||||
###
|
||||
# 64 bit specific files
|
||||
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
|
||||
index ab2c8ef43cdb..436005392047 100644
|
||||
--- a/scripts/Makefile.build
|
||||
+++ b/scripts/Makefile.build
|
||||
@@ -258,7 +258,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
|
||||
|
||||
__objtool_obj := $(objtree)/tools/objtool/objtool
|
||||
|
||||
-objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
|
||||
+objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
|
||||
|
||||
ifndef CONFIG_FRAME_POINTER
|
||||
objtool_args += --no-fp
|
||||
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
|
||||
index 9eb7c718aaf8..9f05a1002aa9 100644
|
||||
--- a/arch/x86/include/asm/module.h
|
||||
+++ b/arch/x86/include/asm/module.h
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <asm/orc_types.h>
|
||||
|
||||
struct mod_arch_specific {
|
||||
-#ifdef CONFIG_ORC_UNWINDER
|
||||
+#ifdef CONFIG_UNWINDER_ORC
|
||||
unsigned int num_orcs;
|
||||
int *orc_unwind_ip;
|
||||
struct orc_entry *orc_unwind;
|
||||
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
|
||||
index e9f793e2df7a..35d67dc7b69f 100644
|
||||
--- a/arch/x86/include/asm/unwind.h
|
||||
+++ b/arch/x86/include/asm/unwind.h
|
||||
@@ -12,11 +12,11 @@ struct unwind_state {
|
||||
struct task_struct *task;
|
||||
int graph_idx;
|
||||
bool error;
|
||||
-#if defined(CONFIG_ORC_UNWINDER)
|
||||
+#if defined(CONFIG_UNWINDER_ORC)
|
||||
bool signal, full_regs;
|
||||
unsigned long sp, bp, ip;
|
||||
struct pt_regs *regs;
|
||||
-#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
+#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
bool got_irq;
|
||||
unsigned long *bp, *orig_sp, ip;
|
||||
struct pt_regs *regs;
|
||||
@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
__unwind_start(state, task, regs, first_frame);
|
||||
}
|
||||
|
||||
-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
+#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
@@ -65,7 +65,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
}
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_ORC_UNWINDER
|
||||
+#ifdef CONFIG_UNWINDER_ORC
|
||||
void unwind_init(void);
|
||||
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
|
||||
void *orc, size_t orc_size);
|
||||
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
|
||||
index 9fdb54a95976..e71e42432360 100644
|
||||
--- a/include/asm-generic/vmlinux.lds.h
|
||||
+++ b/include/asm-generic/vmlinux.lds.h
|
||||
@@ -686,7 +686,7 @@
|
||||
#define BUG_TABLE
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_ORC_UNWINDER
|
||||
+#ifdef CONFIG_UNWINDER_ORC
|
||||
#define ORC_UNWIND_TABLE \
|
||||
. = ALIGN(4); \
|
||||
.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index 3a0b8cb57caf..bf9f03740c30 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -168,7 +168,7 @@ config X86
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
- select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
|
||||
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index c441b5d65ec8..5435a943f894 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -358,13 +358,13 @@ config PUNIT_ATOM_DEBUG
|
||||
|
||||
choice
|
||||
prompt "Choose kernel unwinder"
|
||||
- default FRAME_POINTER_UNWINDER
|
||||
+ default UNWINDER_FRAME_POINTER
|
||||
---help---
|
||||
This determines which method will be used for unwinding kernel stack
|
||||
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||
livepatch, lockdep, and more.
|
||||
|
||||
-config FRAME_POINTER_UNWINDER
|
||||
+config UNWINDER_FRAME_POINTER
|
||||
bool "Frame pointer unwinder"
|
||||
select FRAME_POINTER
|
||||
---help---
|
||||
@@ -379,7 +379,7 @@ config FRAME_POINTER_UNWINDER
|
||||
consistency model, as this is currently the only way to get a
|
||||
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
|
||||
-config ORC_UNWINDER
|
||||
+config UNWINDER_ORC
|
||||
bool "ORC unwinder"
|
||||
depends on X86_64
|
||||
select STACK_VALIDATION
|
||||
@@ -396,7 +396,7 @@ config ORC_UNWINDER
|
||||
Enabling this option will increase the kernel's runtime memory usage
|
||||
by roughly 2-4MB, depending on your kernel config.
|
||||
|
||||
-config GUESS_UNWINDER
|
||||
+config UNWINDER_GUESS
|
||||
bool "Guess unwinder"
|
||||
depends on EXPERT
|
||||
---help---
|
||||
@@ -411,7 +411,7 @@ config GUESS_UNWINDER
|
||||
endchoice
|
||||
|
||||
config FRAME_POINTER
|
||||
- depends on !ORC_UNWINDER && !GUESS_UNWINDER
|
||||
+ depends on !UNWINDER_ORC && !UNWINDER_GUESS
|
||||
bool
|
||||
|
||||
endmenu
|
||||
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
|
||||
index 550cd5012b73..66c9e2aab16c 100644
|
||||
--- a/arch/x86/configs/tiny.config
|
||||
+++ b/arch/x86/configs/tiny.config
|
||||
@@ -1,5 +1,5 @@
|
||||
CONFIG_NOHIGHMEM=y
|
||||
# CONFIG_HIGHMEM4G is not set
|
||||
# CONFIG_HIGHMEM64G is not set
|
||||
-CONFIG_GUESS_UNWINDER=y
|
||||
-# CONFIG_FRAME_POINTER_UNWINDER is not set
|
||||
+CONFIG_UNWINDER_GUESS=y
|
||||
+# CONFIG_UNWINDER_FRAME_POINTER is not set
|
||||
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
|
||||
index eb65c248708d..e32fc1f274d8 100644
|
||||
--- a/arch/x86/configs/x86_64_defconfig
|
||||
+++ b/arch/x86/configs/x86_64_defconfig
|
||||
@@ -299,7 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
-CONFIG_ORC_UNWINDER=y
|
||||
+CONFIG_UNWINDER_ORC=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
|
||||
index 0b4d1b3880b0..4f6ca5f60f7e 100644
|
||||
--- a/lib/Kconfig.debug
|
||||
+++ b/lib/Kconfig.debug
|
||||
@@ -375,7 +375,7 @@ config STACK_VALIDATION
|
||||
that runtime stack traces are more reliable.
|
||||
|
||||
This is also a prerequisite for generation of ORC unwind data, which
|
||||
- is needed for CONFIG_ORC_UNWINDER.
|
||||
+ is needed for CONFIG_UNWINDER_ORC.
|
||||
|
||||
For more information, see
|
||||
tools/objtool/Documentation/stack-validation.txt.
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,90 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 13 Oct 2017 15:02:01 -0500
|
||||
Subject: [PATCH] x86/unwind: Make CONFIG_UNWINDER_ORC=y the default in kconfig
|
||||
for 64-bit
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The ORC unwinder has been stable in testing so far. Give it much wider
|
||||
testing by making it the default in kconfig for x86_64. It's not yet
|
||||
supported for 32-bit, so leave frame pointers as the default there.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/9b1237bbe7244ed9cdf8db2dcb1253e37e1c341e.1507924831.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit fc72ae40e30327aa24eb88a24b9c7058f938bd36)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit aff8d5169f46ae6ac0eb26a5ba745aaf9afa0704)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/Kconfig.debug | 33 +++++++++++++++++----------------
|
||||
1 file changed, 17 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index 5435a943f894..7d88e9878a75 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -358,27 +358,13 @@ config PUNIT_ATOM_DEBUG
|
||||
|
||||
choice
|
||||
prompt "Choose kernel unwinder"
|
||||
- default UNWINDER_FRAME_POINTER
|
||||
+ default UNWINDER_ORC if X86_64
|
||||
+ default UNWINDER_FRAME_POINTER if X86_32
|
||||
---help---
|
||||
This determines which method will be used for unwinding kernel stack
|
||||
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||
livepatch, lockdep, and more.
|
||||
|
||||
-config UNWINDER_FRAME_POINTER
|
||||
- bool "Frame pointer unwinder"
|
||||
- select FRAME_POINTER
|
||||
- ---help---
|
||||
- This option enables the frame pointer unwinder for unwinding kernel
|
||||
- stack traces.
|
||||
-
|
||||
- The unwinder itself is fast and it uses less RAM than the ORC
|
||||
- unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
- overall performance will degrade by roughly 5-10%.
|
||||
-
|
||||
- This option is recommended if you want to use the livepatch
|
||||
- consistency model, as this is currently the only way to get a
|
||||
- reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
-
|
||||
config UNWINDER_ORC
|
||||
bool "ORC unwinder"
|
||||
depends on X86_64
|
||||
@@ -396,6 +382,21 @@ config UNWINDER_ORC
|
||||
Enabling this option will increase the kernel's runtime memory usage
|
||||
by roughly 2-4MB, depending on your kernel config.
|
||||
|
||||
+config UNWINDER_FRAME_POINTER
|
||||
+ bool "Frame pointer unwinder"
|
||||
+ select FRAME_POINTER
|
||||
+ ---help---
|
||||
+ This option enables the frame pointer unwinder for unwinding kernel
|
||||
+ stack traces.
|
||||
+
|
||||
+ The unwinder itself is fast and it uses less RAM than the ORC
|
||||
+ unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
+ overall performance will degrade by roughly 5-10%.
|
||||
+
|
||||
+ This option is recommended if you want to use the livepatch
|
||||
+ consistency model, as this is currently the only way to get a
|
||||
+ reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
+
|
||||
config UNWINDER_GUESS
|
||||
bool "Guess unwinder"
|
||||
depends on EXPERT
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,69 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:41 -0700
|
||||
Subject: [PATCH] bitops: Add clear/set_bit32() to linux/bitops.h
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add two simple wrappers around set_bit/clear_bit() that accept
|
||||
the common case of an u32 array. This avoids writing
|
||||
casts in all callers.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-2-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit cbe96375025e14fc76f9ed42ee5225120d7210f8)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 06d31c11519ca0e8f9b7cab857f442ef44dfc1b2)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
include/linux/bitops.h | 26 ++++++++++++++++++++++++++
|
||||
1 file changed, 26 insertions(+)
|
||||
|
||||
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
|
||||
index a83c822c35c2..eb257a96db6d 100644
|
||||
--- a/include/linux/bitops.h
|
||||
+++ b/include/linux/bitops.h
|
||||
@@ -226,6 +226,32 @@ static inline unsigned long __ffs64(u64 word)
|
||||
return __ffs((unsigned long)word);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * clear_bit32 - Clear a bit in memory for u32 array
|
||||
+ * @nr: Bit to clear
|
||||
+ * @addr: u32 * address of bitmap
|
||||
+ *
|
||||
+ * Same as clear_bit, but avoids needing casts for u32 arrays.
|
||||
+ */
|
||||
+
|
||||
+static __always_inline void clear_bit32(long nr, volatile u32 *addr)
|
||||
+{
|
||||
+ clear_bit(nr, (volatile unsigned long *)addr);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * set_bit32 - Set a bit in memory for u32 array
|
||||
+ * @nr: Bit to clear
|
||||
+ * @addr: u32 * address of bitmap
|
||||
+ *
|
||||
+ * Same as set_bit, but avoids needing casts for u32 arrays.
|
||||
+ */
|
||||
+
|
||||
+static __always_inline void set_bit32(long nr, volatile u32 *addr)
|
||||
+{
|
||||
+ set_bit(nr, (volatile unsigned long *)addr);
|
||||
+}
|
||||
+
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#ifndef set_mask_bits
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,221 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:42 -0700
|
||||
Subject: [PATCH] x86/cpuid: Add generic table for CPUID dependencies
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Some CPUID features depend on other features. Currently it's
|
||||
possible to to clear dependent features, but not clear the base features,
|
||||
which can cause various interesting problems.
|
||||
|
||||
This patch implements a generic table to describe dependencies
|
||||
between CPUID features, to be used by all code that clears
|
||||
CPUID.
|
||||
|
||||
Some subsystems (like XSAVE) had an own implementation of this,
|
||||
but it's better to do it all in a single place for everyone.
|
||||
|
||||
Then clear_cpu_cap and setup_clear_cpu_cap always look up
|
||||
this table and clear all dependencies too.
|
||||
|
||||
This is intended to be a practical table: only for features
|
||||
that make sense to clear. If someone for example clears FPU,
|
||||
or other features that are essentially part of the required
|
||||
base feature set, not much is going to work. Handling
|
||||
that is right now out of scope. We're only handling
|
||||
features which can be usefully cleared.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Jonathan McDowell <noodles@earth.li>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-3-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 0b00de857a648dafe7020878c7a27cf776f5edf4)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 35672522f2fc9a2e116ed1766f190bc08ef5582a)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/Makefile | 1 +
|
||||
arch/x86/include/asm/cpufeature.h | 9 ++-
|
||||
arch/x86/include/asm/cpufeatures.h | 5 ++
|
||||
arch/x86/kernel/cpu/cpuid-deps.c | 113 +++++++++++++++++++++++++++++++++++++
|
||||
4 files changed, 123 insertions(+), 5 deletions(-)
|
||||
create mode 100644 arch/x86/kernel/cpu/cpuid-deps.c
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
|
||||
index e17942c131c8..de260fae1017 100644
|
||||
--- a/arch/x86/kernel/cpu/Makefile
|
||||
+++ b/arch/x86/kernel/cpu/Makefile
|
||||
@@ -22,6 +22,7 @@ obj-y += rdrand.o
|
||||
obj-y += match.o
|
||||
obj-y += bugs.o
|
||||
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
|
||||
+obj-y += cpuid-deps.o
|
||||
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
|
||||
index d59c15c3defd..225fd8374fae 100644
|
||||
--- a/arch/x86/include/asm/cpufeature.h
|
||||
+++ b/arch/x86/include/asm/cpufeature.h
|
||||
@@ -125,11 +125,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
||||
|
||||
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
|
||||
-#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
|
||||
-#define setup_clear_cpu_cap(bit) do { \
|
||||
- clear_cpu_cap(&boot_cpu_data, bit); \
|
||||
- set_bit(bit, (unsigned long *)cpu_caps_cleared); \
|
||||
-} while (0)
|
||||
+
|
||||
+extern void setup_clear_cpu_cap(unsigned int bit);
|
||||
+extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
|
||||
+
|
||||
#define setup_force_cpu_cap(bit) do { \
|
||||
set_cpu_cap(&boot_cpu_data, bit); \
|
||||
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
||||
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
||||
index 5a28e8e55e36..f4e145c4b06f 100644
|
||||
--- a/arch/x86/include/asm/cpufeatures.h
|
||||
+++ b/arch/x86/include/asm/cpufeatures.h
|
||||
@@ -21,6 +21,11 @@
|
||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||
*/
|
||||
|
||||
+/*
|
||||
+ * When adding new features here that depend on other features,
|
||||
+ * please update the table in kernel/cpu/cpuid-deps.c
|
||||
+ */
|
||||
+
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
new file mode 100644
|
||||
index 000000000000..e48eb7313120
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
@@ -0,0 +1,113 @@
|
||||
+/* Declare dependencies between CPUIDs */
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <asm/cpufeature.h>
|
||||
+
|
||||
+struct cpuid_dep {
|
||||
+ unsigned int feature;
|
||||
+ unsigned int depends;
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * Table of CPUID features that depend on others.
|
||||
+ *
|
||||
+ * This only includes dependencies that can be usefully disabled, not
|
||||
+ * features part of the base set (like FPU).
|
||||
+ *
|
||||
+ * Note this all is not __init / __initdata because it can be
|
||||
+ * called from cpu hotplug. It shouldn't do anything in this case,
|
||||
+ * but it's difficult to tell that to the init reference checker.
|
||||
+ */
|
||||
+const static struct cpuid_dep cpuid_deps[] = {
|
||||
+ { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
|
||||
+ { X86_FEATURE_XMM, X86_FEATURE_FXSR },
|
||||
+ { X86_FEATURE_XMM2, X86_FEATURE_XMM },
|
||||
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
|
||||
+ { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
|
||||
+ { X86_FEATURE_AES, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_FMA, X86_FEATURE_AVX },
|
||||
+ { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
|
||||
+ { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
|
||||
+ { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
||||
+ {}
|
||||
+};
|
||||
+
|
||||
+static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
|
||||
+{
|
||||
+ clear_bit32(bit, c->x86_capability);
|
||||
+}
|
||||
+
|
||||
+static inline void __setup_clear_cpu_cap(unsigned int bit)
|
||||
+{
|
||||
+ clear_cpu_cap(&boot_cpu_data, bit);
|
||||
+ set_bit32(bit, cpu_caps_cleared);
|
||||
+}
|
||||
+
|
||||
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
+{
|
||||
+ if (!c)
|
||||
+ __setup_clear_cpu_cap(feature);
|
||||
+ else
|
||||
+ __clear_cpu_cap(c, feature);
|
||||
+}
|
||||
+
|
||||
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
+{
|
||||
+ bool changed;
|
||||
+ DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
|
||||
+ const struct cpuid_dep *d;
|
||||
+
|
||||
+ clear_feature(c, feature);
|
||||
+
|
||||
+ /* Collect all features to disable, handling dependencies */
|
||||
+ memset(disable, 0, sizeof(disable));
|
||||
+ __set_bit(feature, disable);
|
||||
+
|
||||
+ /* Loop until we get a stable state. */
|
||||
+ do {
|
||||
+ changed = false;
|
||||
+ for (d = cpuid_deps; d->feature; d++) {
|
||||
+ if (!test_bit(d->depends, disable))
|
||||
+ continue;
|
||||
+ if (__test_and_set_bit(d->feature, disable))
|
||||
+ continue;
|
||||
+
|
||||
+ changed = true;
|
||||
+ clear_feature(c, d->feature);
|
||||
+ }
|
||||
+ } while (changed);
|
||||
+}
|
||||
+
|
||||
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
+{
|
||||
+ do_clear_cpu_cap(c, feature);
|
||||
+}
|
||||
+
|
||||
+void setup_clear_cpu_cap(unsigned int feature)
|
||||
+{
|
||||
+ do_clear_cpu_cap(NULL, feature);
|
||||
+}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,97 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:43 -0700
|
||||
Subject: [PATCH] x86/fpu: Parse clearcpuid= as early XSAVE argument
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
With a followon patch we want to make clearcpuid affect the XSAVE
|
||||
configuration. But xsave is currently initialized before arguments
|
||||
are parsed. Move the clearcpuid= parsing into the special
|
||||
early xsave argument parsing code.
|
||||
|
||||
Since clearcpuid= contains a = we need to keep the old __setup
|
||||
around as a dummy, otherwise it would end up as a environment
|
||||
variable in init's environment.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-4-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 0c2a3913d6f50503f7c59d83a6219e39508cc898)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27deb452eb0d27c406f3817ab057201aa8767abe)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 16 +++++++---------
|
||||
arch/x86/kernel/fpu/init.c | 11 +++++++++++
|
||||
2 files changed, 18 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index 4be7b209a3d6..ef7b1ba56363 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -1293,18 +1293,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
|
||||
pr_cont(")\n");
|
||||
}
|
||||
|
||||
-static __init int setup_disablecpuid(char *arg)
|
||||
+/*
|
||||
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
|
||||
+ * But we need to keep a dummy __setup around otherwise it would
|
||||
+ * show up as an environment variable for init.
|
||||
+ */
|
||||
+static __init int setup_clearcpuid(char *arg)
|
||||
{
|
||||
- int bit;
|
||||
-
|
||||
- if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
|
||||
- setup_clear_cpu_cap(bit);
|
||||
- else
|
||||
- return 0;
|
||||
-
|
||||
return 1;
|
||||
}
|
||||
-__setup("clearcpuid=", setup_disablecpuid);
|
||||
+__setup("clearcpuid=", setup_clearcpuid);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_ptr idt_descr __ro_after_init = {
|
||||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
|
||||
index d5d44c452624..07f0ab877f49 100644
|
||||
--- a/arch/x86/kernel/fpu/init.c
|
||||
+++ b/arch/x86/kernel/fpu/init.c
|
||||
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
|
||||
*/
|
||||
static void __init fpu__init_parse_early_param(void)
|
||||
{
|
||||
+ char arg[32];
|
||||
+ char *argptr = arg;
|
||||
+ int bit;
|
||||
+
|
||||
if (cmdline_find_option_bool(boot_command_line, "no387"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
||||
|
||||
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
+
|
||||
+ if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
|
||||
+ sizeof(arg)) &&
|
||||
+ get_option(&argptr, &bit) &&
|
||||
+ bit >= 0 &&
|
||||
+ bit < NCAPINTS * 32)
|
||||
+ setup_clear_cpu_cap(bit);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,90 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:44 -0700
|
||||
Subject: [PATCH] x86/fpu: Make XSAVE check the base CPUID features before
|
||||
enabling
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Before enabling XSAVE, not only check the XSAVE specific CPUID bits,
|
||||
but also the base CPUID features of the respective XSAVE feature.
|
||||
This allows to disable individual XSAVE states using the existing
|
||||
clearcpuid= option, which can be useful for performance testing
|
||||
and debugging, and also in general avoids inconsistencies.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-5-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit ccb18db2ab9d923df07e7495123fe5fb02329713)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 2efda26f9ee0eeb9919772e90ca30dbe59008dc8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/fpu/xstate.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
|
||||
index c24ac1efb12d..3abe85b08234 100644
|
||||
--- a/arch/x86/kernel/fpu/xstate.c
|
||||
+++ b/arch/x86/kernel/fpu/xstate.c
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <asm/fpu/xstate.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
+#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* Although we spell it out in here, the Processor Trace
|
||||
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
|
||||
"unknown xstate feature" ,
|
||||
};
|
||||
|
||||
+static short xsave_cpuid_features[] __initdata = {
|
||||
+ X86_FEATURE_FPU,
|
||||
+ X86_FEATURE_XMM,
|
||||
+ X86_FEATURE_AVX,
|
||||
+ X86_FEATURE_MPX,
|
||||
+ X86_FEATURE_MPX,
|
||||
+ X86_FEATURE_AVX512F,
|
||||
+ X86_FEATURE_AVX512F,
|
||||
+ X86_FEATURE_AVX512F,
|
||||
+ X86_FEATURE_INTEL_PT,
|
||||
+ X86_FEATURE_PKU,
|
||||
+};
|
||||
+
|
||||
/*
|
||||
* Mask of xstate features supported by the CPU and the kernel:
|
||||
*/
|
||||
@@ -702,6 +716,7 @@ void __init fpu__init_system_xstate(void)
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
static int on_boot_cpu __initdata = 1;
|
||||
int err;
|
||||
+ int i;
|
||||
|
||||
WARN_ON_FPU(!on_boot_cpu);
|
||||
on_boot_cpu = 0;
|
||||
@@ -735,6 +750,14 @@ void __init fpu__init_system_xstate(void)
|
||||
goto out_disable;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Clear XSAVE features that are disabled in the normal CPUID.
|
||||
+ */
|
||||
+ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
|
||||
+ if (!boot_cpu_has(xsave_cpuid_features[i]))
|
||||
+ xfeatures_mask &= ~BIT(i);
|
||||
+ }
|
||||
+
|
||||
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
|
||||
|
||||
/* Enable xstate instructions to be able to continue with initialization: */
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:45 -0700
|
||||
Subject: [PATCH] x86/fpu: Remove the explicit clearing of XSAVE dependent
|
||||
features
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Clearing a CPU feature with setup_clear_cpu_cap() clears all features
|
||||
which depend on it. Expressing feature dependencies in one place is
|
||||
easier to maintain than keeping functions like
|
||||
fpu__xstate_clear_all_cpu_caps() up to date.
|
||||
|
||||
The features which depend on XSAVE have their dependency expressed in the
|
||||
dependency table, so its sufficient to clear X86_FEATURE_XSAVE.
|
||||
|
||||
Remove the explicit clearing of XSAVE dependent features.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-6-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 73e3a7d2a7c3be29a5a22b85026f6cfa5664267f)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit af445f9ba8bb30b47ccb5247b8f5ba28c9f2be3e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/fpu/xstate.c | 20 --------------------
|
||||
1 file changed, 20 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
|
||||
index 3abe85b08234..fd6882c42246 100644
|
||||
--- a/arch/x86/kernel/fpu/xstate.c
|
||||
+++ b/arch/x86/kernel/fpu/xstate.c
|
||||
@@ -73,26 +73,6 @@ unsigned int fpu_user_xstate_size;
|
||||
void fpu__xstate_clear_all_cpu_caps(void)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX2);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512F);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_PKU);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,57 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Kees Cook <keescook@chromium.org>
|
||||
Date: Mon, 16 Oct 2017 16:22:31 -0700
|
||||
Subject: [PATCH] x86/platform/UV: Convert timers to use timer_setup()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
In preparation for unconditionally passing the struct timer_list pointer to
|
||||
all timer callbacks, switch to using the new timer_setup() and from_timer()
|
||||
to pass the timer pointer explicitly.
|
||||
|
||||
Signed-off-by: Kees Cook <keescook@chromium.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Dimitri Sivanich <sivanich@hpe.com>
|
||||
Cc: Russ Anderson <rja@hpe.com>
|
||||
Cc: Mike Travis <mike.travis@hpe.com>
|
||||
Link: https://lkml.kernel.org/r/20171016232231.GA100493@beast
|
||||
|
||||
(cherry picked from commit 376f3bcebdc999cc737d9052109cc33b573b3a8b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 869cbd2b31024e70d574527b8c6851bf2ebbe483)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
|
||||
index 0d57bb9079c9..c0b694810ff4 100644
|
||||
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
|
||||
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
|
||||
@@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
|
||||
/*
|
||||
* percpu heartbeat timer
|
||||
*/
|
||||
-static void uv_heartbeat(unsigned long ignored)
|
||||
+static void uv_heartbeat(struct timer_list *timer)
|
||||
{
|
||||
- struct timer_list *timer = &uv_scir_info->timer;
|
||||
unsigned char bits = uv_scir_info->state;
|
||||
|
||||
/* Flip heartbeat bit: */
|
||||
@@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
|
||||
struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
|
||||
|
||||
uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
|
||||
- setup_pinned_timer(timer, uv_heartbeat, cpu);
|
||||
+ timer_setup(timer, uv_heartbeat, TIMER_PINNED);
|
||||
timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
|
||||
add_timer_on(timer, cpu);
|
||||
uv_cpu_scir_info(cpu)->enabled = 1;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,73 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
|
||||
Date: Sat, 14 Oct 2017 20:17:54 +0530
|
||||
Subject: [PATCH] objtool: Print top level commands on incorrect usage
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Print top-level objtool commands, along with the error on incorrect
|
||||
command line usage. Objtool command line parser exit's with code 129,
|
||||
for incorrect usage. Convert the cmd_usage() exit code also, to maintain
|
||||
consistency across objtool.
|
||||
|
||||
After the patch:
|
||||
|
||||
$ ./objtool -j
|
||||
|
||||
Unknown option: -j
|
||||
|
||||
usage: objtool COMMAND [ARGS]
|
||||
|
||||
Commands:
|
||||
check Perform stack metadata validation on an object file
|
||||
orc Generate in-place ORC unwind tables for an object file
|
||||
|
||||
$ echo $?
|
||||
129
|
||||
|
||||
Signed-off-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
|
||||
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/1507992474-16142-1-git-send-email-kamalesh@linux.vnet.ibm.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 6a93bb7e4a7d6670677d5b0eb980936eb9cc5d2e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit cd75c9c55a5f288e1d3f20c48c5c4c2caf3966e8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
tools/objtool/objtool.c | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
|
||||
index 31e0f9143840..07f329919828 100644
|
||||
--- a/tools/objtool/objtool.c
|
||||
+++ b/tools/objtool/objtool.c
|
||||
@@ -70,7 +70,7 @@ static void cmd_usage(void)
|
||||
|
||||
printf("\n");
|
||||
|
||||
- exit(1);
|
||||
+ exit(129);
|
||||
}
|
||||
|
||||
static void handle_options(int *argc, const char ***argv)
|
||||
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
|
||||
break;
|
||||
} else {
|
||||
fprintf(stderr, "Unknown option: %s\n", cmd);
|
||||
- fprintf(stderr, "\n Usage: %s\n",
|
||||
- objtool_usage_string);
|
||||
- exit(1);
|
||||
+ cmd_usage();
|
||||
}
|
||||
|
||||
(*argv)++;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,64 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 18 Oct 2017 19:39:35 +0200
|
||||
Subject: [PATCH] x86/cpuid: Prevent out of bound access in do_clear_cpu_cap()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
do_clear_cpu_cap() allocates a bitmap to keep track of disabled feature
|
||||
dependencies. That bitmap is sized NCAPINTS * BITS_PER_INIT. The possible
|
||||
'features' which can be handed in are larger than this, because after the
|
||||
capabilities the bug 'feature' bits occupy another 32bit. Not really
|
||||
obvious...
|
||||
|
||||
So clearing any of the misfeature bits, as 32bit does for the F00F bug,
|
||||
accesses that bitmap out of bounds thereby corrupting the stack.
|
||||
|
||||
Size the bitmap proper and add a sanity check to catch accidental out of
|
||||
bound access.
|
||||
|
||||
Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies")
|
||||
Reported-by: kernel test robot <xiaolong.ye@intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andi Kleen <ak@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Link: https://lkml.kernel.org/r/20171018022023.GA12058@yexl-desktop
|
||||
(cherry picked from commit 57b8b1a1856adaa849d02d547411a553a531022b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 4b3a90bd20b35a97fd9ca6f6a71131f4417782e4)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/cpuid-deps.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
index e48eb7313120..c1d49842a411 100644
|
||||
--- a/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
@@ -75,11 +75,17 @@ static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
__clear_cpu_cap(c, feature);
|
||||
}
|
||||
|
||||
+/* Take the capabilities and the BUG bits into account */
|
||||
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
|
||||
+
|
||||
static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
{
|
||||
- bool changed;
|
||||
- DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
|
||||
+ DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
|
||||
const struct cpuid_dep *d;
|
||||
+ bool changed;
|
||||
+
|
||||
+ if (WARN_ON(feature >= MAX_FEATURE_BITS))
|
||||
+ return;
|
||||
|
||||
clear_feature(c, feature);
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,124 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Date: Wed, 18 Oct 2017 10:21:07 -0700
|
||||
Subject: [PATCH] x86/entry: Use SYSCALL_DEFINE() macros for sys_modify_ldt()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
We do not have tracepoints for sys_modify_ldt() because we define
|
||||
it directly instead of using the normal SYSCALL_DEFINEx() macros.
|
||||
|
||||
However, there is a reason sys_modify_ldt() does not use the macros:
|
||||
it has an 'int' return type instead of 'unsigned long'. This is
|
||||
a bug, but it's a bug cemented in the ABI.
|
||||
|
||||
What does this mean? If we return -EINVAL from a function that
|
||||
returns 'int', we have 0x00000000ffffffea in %rax. But, if we
|
||||
return -EINVAL from a function returning 'unsigned long', we end
|
||||
up with 0xffffffffffffffea in %rax, which is wrong.
|
||||
|
||||
To work around this and maintain the 'int' behavior while using
|
||||
the SYSCALL_DEFINEx() macros, so we add a cast to 'unsigned int'
|
||||
in both implementations of sys_modify_ldt().
|
||||
|
||||
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Reviewed-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/20171018172107.1A79C532@viggo.jf.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit da20ab35180780e4a6eadc804544f1fa967f3567)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d865f635f4b2c3307e79de9be5c49ea8bd4c43a6)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/syscalls.h | 2 +-
|
||||
arch/x86/kernel/ldt.c | 16 +++++++++++++---
|
||||
arch/x86/um/ldt.c | 7 +++++--
|
||||
3 files changed, 19 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
|
||||
index 91dfcafe27a6..bad25bb80679 100644
|
||||
--- a/arch/x86/include/asm/syscalls.h
|
||||
+++ b/arch/x86/include/asm/syscalls.h
|
||||
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
|
||||
asmlinkage long sys_iopl(unsigned int);
|
||||
|
||||
/* kernel/ldt.c */
|
||||
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
|
||||
+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
|
||||
|
||||
/* kernel/signal.c */
|
||||
asmlinkage long sys_rt_sigreturn(void);
|
||||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
|
||||
index f0e64db18ac8..0402d44deb4d 100644
|
||||
--- a/arch/x86/kernel/ldt.c
|
||||
+++ b/arch/x86/kernel/ldt.c
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp.h>
|
||||
+#include <linux/syscalls.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
@@ -294,8 +295,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
|
||||
return error;
|
||||
}
|
||||
|
||||
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
|
||||
- unsigned long bytecount)
|
||||
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
|
||||
+ unsigned long , bytecount)
|
||||
{
|
||||
int ret = -ENOSYS;
|
||||
|
||||
@@ -313,5 +314,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
|
||||
ret = write_ldt(ptr, bytecount, 0);
|
||||
break;
|
||||
}
|
||||
- return ret;
|
||||
+ /*
|
||||
+ * The SYSCALL_DEFINE() macros give us an 'unsigned long'
|
||||
+ * return type, but tht ABI for sys_modify_ldt() expects
|
||||
+ * 'int'. This cast gives us an int-sized value in %rax
|
||||
+ * for the return code. The 'unsigned' is necessary so
|
||||
+ * the compiler does not try to sign-extend the negative
|
||||
+ * return codes into the high half of the register when
|
||||
+ * taking the value from int->long.
|
||||
+ */
|
||||
+ return (unsigned int)ret;
|
||||
}
|
||||
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
|
||||
index 836a1eb5df43..3ee234b6234d 100644
|
||||
--- a/arch/x86/um/ldt.c
|
||||
+++ b/arch/x86/um/ldt.c
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/syscalls.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <os.h>
|
||||
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
|
||||
mm->arch.ldt.entry_count = 0;
|
||||
}
|
||||
|
||||
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
|
||||
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
|
||||
+ unsigned long , bytecount)
|
||||
{
|
||||
- return do_modify_ldt_skas(func, ptr, bytecount);
|
||||
+ /* See non-um modify_ldt() for why we do this cast */
|
||||
+ return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
|
||||
}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,141 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:16 +0300
|
||||
Subject: [PATCH] mm/sparsemem: Allocate mem_section at runtime for
|
||||
CONFIG_SPARSEMEM_EXTREME=y
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Size of the mem_section[] array depends on the size of the physical address space.
|
||||
|
||||
In preparation for boot-time switching between paging modes on x86-64
|
||||
we need to make the allocation of mem_section[] dynamic, because otherwise
|
||||
we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB
|
||||
for 4-level paging and 2MB for 5-level paging mode.
|
||||
|
||||
The patch allocates the array on the first call to sparse_memory_present_with_active_regions().
|
||||
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c70f71e01a0ae5d884abae0424618abe90b82011)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
include/linux/mmzone.h | 6 +++++-
|
||||
mm/page_alloc.c | 10 ++++++++++
|
||||
mm/sparse.c | 17 +++++++++++------
|
||||
3 files changed, 26 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index fc14b8b3f6ce..9c6c001a8c6c 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -1137,13 +1137,17 @@ struct mem_section {
|
||||
#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
-extern struct mem_section *mem_section[NR_SECTION_ROOTS];
|
||||
+extern struct mem_section **mem_section;
|
||||
#else
|
||||
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
|
||||
#endif
|
||||
|
||||
static inline struct mem_section *__nr_to_section(unsigned long nr)
|
||||
{
|
||||
+#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
+ if (!mem_section)
|
||||
+ return NULL;
|
||||
+#endif
|
||||
if (!mem_section[SECTION_NR_TO_ROOT(nr)])
|
||||
return NULL;
|
||||
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 1423da8dd16f..66eb23ab658d 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -5707,6 +5707,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
|
||||
unsigned long start_pfn, end_pfn;
|
||||
int i, this_nid;
|
||||
|
||||
+#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
+ if (!mem_section) {
|
||||
+ unsigned long size, align;
|
||||
+
|
||||
+ size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
|
||||
+ align = 1 << (INTERNODE_CACHE_SHIFT);
|
||||
+ mem_section = memblock_virt_alloc(size, align);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
|
||||
memory_present(this_nid, start_pfn, end_pfn);
|
||||
}
|
||||
diff --git a/mm/sparse.c b/mm/sparse.c
|
||||
index cdce7a7bb3f3..308a0789d1bb 100644
|
||||
--- a/mm/sparse.c
|
||||
+++ b/mm/sparse.c
|
||||
@@ -22,8 +22,7 @@
|
||||
* 1) mem_section - memory sections, mem_map's for valid memory
|
||||
*/
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
-struct mem_section *mem_section[NR_SECTION_ROOTS]
|
||||
- ____cacheline_internodealigned_in_smp;
|
||||
+struct mem_section **mem_section;
|
||||
#else
|
||||
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
|
||||
____cacheline_internodealigned_in_smp;
|
||||
@@ -104,7 +103,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
|
||||
int __section_nr(struct mem_section* ms)
|
||||
{
|
||||
unsigned long root_nr;
|
||||
- struct mem_section* root;
|
||||
+ struct mem_section *root = NULL;
|
||||
|
||||
for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
|
||||
root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
|
||||
@@ -115,7 +114,7 @@ int __section_nr(struct mem_section* ms)
|
||||
break;
|
||||
}
|
||||
|
||||
- VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
|
||||
+ VM_BUG_ON(!root);
|
||||
|
||||
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
|
||||
}
|
||||
@@ -333,11 +332,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
|
||||
static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
|
||||
{
|
||||
unsigned long usemap_snr, pgdat_snr;
|
||||
- static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
|
||||
- static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
|
||||
+ static unsigned long old_usemap_snr;
|
||||
+ static unsigned long old_pgdat_snr;
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
int usemap_nid;
|
||||
|
||||
+ /* First call */
|
||||
+ if (!old_usemap_snr) {
|
||||
+ old_usemap_snr = NR_MEM_SECTIONS;
|
||||
+ old_pgdat_snr = NR_MEM_SECTIONS;
|
||||
+ }
|
||||
+
|
||||
usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
|
||||
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
|
||||
if (usemap_snr == pgdat_snr)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,244 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:18 +0300
|
||||
Subject: [PATCH] x86/kasan: Use the same shadow offset for 4- and 5-level
|
||||
paging
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
We are going to support boot-time switching between 4- and 5-level
|
||||
paging. For KASAN it means we cannot have different KASAN_SHADOW_OFFSET
|
||||
for different paging modes: the constant is passed to gcc to generate
|
||||
code and cannot be changed at runtime.
|
||||
|
||||
This patch changes KASAN code to use 0xdffffc0000000000 as shadow offset
|
||||
for both 4- and 5-level paging.
|
||||
|
||||
For 5-level paging it means that shadow memory region is not aligned to
|
||||
PGD boundary anymore and we have to handle unaligned parts of the region
|
||||
properly.
|
||||
|
||||
In addition, we have to exclude paravirt code from KASAN instrumentation
|
||||
as we now use set_pgd() before KASAN is fully ready.
|
||||
|
||||
[kirill.shutemov@linux.intel.com: clenaup, changelog message]
|
||||
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-4-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 12a8cc7fcf54a8575f094be1e99032ec38aa045c)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 2ce428150e002623aa0ed2a1ab840fde5f860f32)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Documentation/x86/x86_64/mm.txt | 2 +-
|
||||
arch/x86/kernel/Makefile | 3 +-
|
||||
arch/x86/mm/kasan_init_64.c | 101 +++++++++++++++++++++++++++++++---------
|
||||
arch/x86/Kconfig | 1 -
|
||||
4 files changed, 83 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
|
||||
index b0798e281aa6..3448e675b462 100644
|
||||
--- a/Documentation/x86/x86_64/mm.txt
|
||||
+++ b/Documentation/x86/x86_64/mm.txt
|
||||
@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
|
||||
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
||||
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||
... unused hole ...
|
||||
-ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
|
||||
+ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
||||
... unused hole ...
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
|
||||
index 5bf0d5a473b4..aa059806201d 100644
|
||||
--- a/arch/x86/kernel/Makefile
|
||||
+++ b/arch/x86/kernel/Makefile
|
||||
@@ -24,7 +24,8 @@ endif
|
||||
KASAN_SANITIZE_head$(BITS).o := n
|
||||
KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
-KASAN_SANITIZE_stacktrace.o := n
|
||||
+KASAN_SANITIZE_stacktrace.o := n
|
||||
+KASAN_SANITIZE_paravirt.o := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
|
||||
index 02c9d7553409..464089f33e80 100644
|
||||
--- a/arch/x86/mm/kasan_init_64.c
|
||||
+++ b/arch/x86/mm/kasan_init_64.c
|
||||
@@ -15,6 +15,8 @@
|
||||
extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
||||
extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||
|
||||
+static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||
+
|
||||
static int __init map_range(struct range *range)
|
||||
{
|
||||
unsigned long start;
|
||||
@@ -30,8 +32,10 @@ static void __init clear_pgds(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
+ /* See comment in kasan_init() */
|
||||
+ unsigned long pgd_end = end & PGDIR_MASK;
|
||||
|
||||
- for (; start < end; start += PGDIR_SIZE) {
|
||||
+ for (; start < pgd_end; start += PGDIR_SIZE) {
|
||||
pgd = pgd_offset_k(start);
|
||||
/*
|
||||
* With folded p4d, pgd_clear() is nop, use p4d_clear()
|
||||
@@ -42,29 +46,61 @@ static void __init clear_pgds(unsigned long start,
|
||||
else
|
||||
pgd_clear(pgd);
|
||||
}
|
||||
+
|
||||
+ pgd = pgd_offset_k(start);
|
||||
+ for (; start < end; start += P4D_SIZE)
|
||||
+ p4d_clear(p4d_offset(pgd, start));
|
||||
+}
|
||||
+
|
||||
+static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
|
||||
+{
|
||||
+ unsigned long p4d;
|
||||
+
|
||||
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
+ return (p4d_t *)pgd;
|
||||
+
|
||||
+ p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
|
||||
+ p4d += __START_KERNEL_map - phys_base;
|
||||
+ return (p4d_t *)p4d + p4d_index(addr);
|
||||
+}
|
||||
+
|
||||
+static void __init kasan_early_p4d_populate(pgd_t *pgd,
|
||||
+ unsigned long addr,
|
||||
+ unsigned long end)
|
||||
+{
|
||||
+ pgd_t pgd_entry;
|
||||
+ p4d_t *p4d, p4d_entry;
|
||||
+ unsigned long next;
|
||||
+
|
||||
+ if (pgd_none(*pgd)) {
|
||||
+ pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
|
||||
+ set_pgd(pgd, pgd_entry);
|
||||
+ }
|
||||
+
|
||||
+ p4d = early_p4d_offset(pgd, addr);
|
||||
+ do {
|
||||
+ next = p4d_addr_end(addr, end);
|
||||
+
|
||||
+ if (!p4d_none(*p4d))
|
||||
+ continue;
|
||||
+
|
||||
+ p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
|
||||
+ set_p4d(p4d, p4d_entry);
|
||||
+ } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
|
||||
}
|
||||
|
||||
static void __init kasan_map_early_shadow(pgd_t *pgd)
|
||||
{
|
||||
- int i;
|
||||
- unsigned long start = KASAN_SHADOW_START;
|
||||
+ /* See comment in kasan_init() */
|
||||
+ unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
|
||||
unsigned long end = KASAN_SHADOW_END;
|
||||
+ unsigned long next;
|
||||
|
||||
- for (i = pgd_index(start); start < end; i++) {
|
||||
- switch (CONFIG_PGTABLE_LEVELS) {
|
||||
- case 4:
|
||||
- pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
|
||||
- _KERNPG_TABLE);
|
||||
- break;
|
||||
- case 5:
|
||||
- pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
|
||||
- _KERNPG_TABLE);
|
||||
- break;
|
||||
- default:
|
||||
- BUILD_BUG();
|
||||
- }
|
||||
- start += PGDIR_SIZE;
|
||||
- }
|
||||
+ pgd += pgd_index(addr);
|
||||
+ do {
|
||||
+ next = pgd_addr_end(addr, end);
|
||||
+ kasan_early_p4d_populate(pgd, addr, next);
|
||||
+ } while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
@@ -101,7 +137,7 @@ void __init kasan_early_init(void)
|
||||
for (i = 0; i < PTRS_PER_PUD; i++)
|
||||
kasan_zero_pud[i] = __pud(pud_val);
|
||||
|
||||
- for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
|
||||
+ for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
|
||||
kasan_zero_p4d[i] = __p4d(p4d_val);
|
||||
|
||||
kasan_map_early_shadow(early_top_pgt);
|
||||
@@ -117,12 +153,35 @@ void __init kasan_init(void)
|
||||
#endif
|
||||
|
||||
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
|
||||
+
|
||||
+ /*
|
||||
+ * We use the same shadow offset for 4- and 5-level paging to
|
||||
+ * facilitate boot-time switching between paging modes.
|
||||
+ * As result in 5-level paging mode KASAN_SHADOW_START and
|
||||
+ * KASAN_SHADOW_END are not aligned to PGD boundary.
|
||||
+ *
|
||||
+ * KASAN_SHADOW_START doesn't share PGD with anything else.
|
||||
+ * We claim whole PGD entry to make things easier.
|
||||
+ *
|
||||
+ * KASAN_SHADOW_END lands in the last PGD entry and it collides with
|
||||
+ * bunch of things like kernel code, modules, EFI mapping, etc.
|
||||
+ * We need to take extra steps to not overwrite them.
|
||||
+ */
|
||||
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
+ void *ptr;
|
||||
+
|
||||
+ ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
|
||||
+ memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
|
||||
+ set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
|
||||
+ __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
|
||||
+ }
|
||||
+
|
||||
load_cr3(early_top_pgt);
|
||||
__flush_tlb_all();
|
||||
|
||||
- clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
+ clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
|
||||
|
||||
- kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
|
||||
+ kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET));
|
||||
|
||||
for (i = 0; i < E820_MAX_ENTRIES; i++) {
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index bf9f03740c30..67d07802ae95 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -300,7 +300,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||
config KASAN_SHADOW_OFFSET
|
||||
hex
|
||||
depends on KASAN
|
||||
- default 0xdff8000000000000 if X86_5LEVEL
|
||||
default 0xdffffc0000000000
|
||||
|
||||
config HAVE_INTEL_TXT
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,80 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:19 +0300
|
||||
Subject: [PATCH] x86/xen: Provide pre-built page tables only for
|
||||
CONFIG_XEN_PV=y and CONFIG_XEN_PVH=y
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Looks like we only need pre-built page tables in the CONFIG_XEN_PV=y and
|
||||
CONFIG_XEN_PVH=y cases.
|
||||
|
||||
Let's not provide them for other configurations.
|
||||
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-5-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 4375c29985f155d7eb2346615d84e62d1b673682)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a883ee7f3c1dc64a8c946543ac598399353d1b03)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 11 ++++++-----
|
||||
1 file changed, 6 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index d081bc7a027d..12daaa0b187f 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -37,11 +37,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
-#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
|
||||
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
+#endif
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
.text
|
||||
@@ -348,10 +349,7 @@ NEXT_PAGE(early_dynamic_pgts)
|
||||
|
||||
.data
|
||||
|
||||
-#ifndef CONFIG_XEN
|
||||
-NEXT_PAGE(init_top_pgt)
|
||||
- .fill 512,8,0
|
||||
-#else
|
||||
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
|
||||
@@ -368,6 +366,9 @@ NEXT_PAGE(level2_ident_pgt)
|
||||
* Don't set NX because code runs from these pages.
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
+#else
|
||||
+NEXT_PAGE(init_top_pgt)
|
||||
+ .fill 512,8,0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,316 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:20 +0300
|
||||
Subject: [PATCH] x86/xen: Drop 5-level paging support code from the XEN_PV
|
||||
code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
It was decided 5-level paging is not going to be supported in XEN_PV.
|
||||
|
||||
Let's drop the dead code from the XEN_PV code.
|
||||
|
||||
Tested-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-6-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 773dd2fca581b0a80e5a33332cc8ee67e5a79cba)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 3fd0b7ef0094fd8bb3c8172d9b137ebe0d81ecbc)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/mmu_pv.c | 159 +++++++++++++++++++-------------------------------
|
||||
1 file changed, 60 insertions(+), 99 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
|
||||
index ba76f3ce997f..45bb2d462e44 100644
|
||||
--- a/arch/x86/xen/mmu_pv.c
|
||||
+++ b/arch/x86/xen/mmu_pv.c
|
||||
@@ -469,7 +469,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
|
||||
|
||||
-#if CONFIG_PGTABLE_LEVELS == 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
__visible pudval_t xen_pud_val(pud_t pud)
|
||||
{
|
||||
return pte_mfn_to_pfn(pud.pud);
|
||||
@@ -558,7 +558,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
+#endif /* CONFIG_X86_64 */
|
||||
|
||||
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
@@ -600,21 +600,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
bool last, unsigned long limit)
|
||||
{
|
||||
- int i, nr, flush = 0;
|
||||
+ int flush = 0;
|
||||
+ pud_t *pud;
|
||||
|
||||
- nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
|
||||
- for (i = 0; i < nr; i++) {
|
||||
- pud_t *pud;
|
||||
|
||||
- if (p4d_none(p4d[i]))
|
||||
- continue;
|
||||
+ if (p4d_none(*p4d))
|
||||
+ return flush;
|
||||
|
||||
- pud = pud_offset(&p4d[i], 0);
|
||||
- if (PTRS_PER_PUD > 1)
|
||||
- flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
- flush |= xen_pud_walk(mm, pud, func,
|
||||
- last && i == nr - 1, limit);
|
||||
- }
|
||||
+ pud = pud_offset(p4d, 0);
|
||||
+ if (PTRS_PER_PUD > 1)
|
||||
+ flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
+ flush |= xen_pud_walk(mm, pud, func, last, limit);
|
||||
return flush;
|
||||
}
|
||||
|
||||
@@ -664,8 +660,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
|
||||
continue;
|
||||
|
||||
p4d = p4d_offset(&pgd[i], 0);
|
||||
- if (PTRS_PER_P4D > 1)
|
||||
- flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
|
||||
flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
|
||||
}
|
||||
|
||||
@@ -1196,22 +1190,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
- unsigned int i;
|
||||
bool unpin;
|
||||
|
||||
unpin = (vaddr == 2 * PGDIR_SIZE);
|
||||
vaddr &= PMD_MASK;
|
||||
pgd = pgd_offset_k(vaddr);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
- for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
- if (p4d_none(p4d[i]))
|
||||
- continue;
|
||||
- xen_cleanmfnmap_p4d(p4d + i, unpin);
|
||||
- }
|
||||
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
- set_pgd(pgd, __pgd(0));
|
||||
- xen_cleanmfnmap_free_pgtbl(p4d, unpin);
|
||||
- }
|
||||
+ if (!p4d_none(*p4d))
|
||||
+ xen_cleanmfnmap_p4d(p4d, unpin);
|
||||
}
|
||||
|
||||
static void __init xen_pagetable_p2m_free(void)
|
||||
@@ -1717,7 +1703,7 @@ static void xen_release_pmd(unsigned long pfn)
|
||||
xen_release_ptpage(pfn, PT_PMD);
|
||||
}
|
||||
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PUD);
|
||||
@@ -2054,13 +2040,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
|
||||
*/
|
||||
void __init xen_relocate_p2m(void)
|
||||
{
|
||||
- phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
|
||||
+ phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
|
||||
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
|
||||
- int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
|
||||
+ int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
|
||||
pte_t *pt;
|
||||
pmd_t *pmd;
|
||||
pud_t *pud;
|
||||
- p4d_t *p4d = NULL;
|
||||
pgd_t *pgd;
|
||||
unsigned long *new_p2m;
|
||||
int save_pud;
|
||||
@@ -2070,11 +2055,7 @@ void __init xen_relocate_p2m(void)
|
||||
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
|
||||
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
|
||||
n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
|
||||
- if (PTRS_PER_P4D > 1)
|
||||
- n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
|
||||
- else
|
||||
- n_p4d = 0;
|
||||
- n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
|
||||
+ n_frames = n_pte + n_pt + n_pmd + n_pud;
|
||||
|
||||
new_area = xen_find_free_area(PFN_PHYS(n_frames));
|
||||
if (!new_area) {
|
||||
@@ -2090,76 +2071,56 @@ void __init xen_relocate_p2m(void)
|
||||
* To avoid any possible virtual address collision, just use
|
||||
* 2 * PUD_SIZE for the new area.
|
||||
*/
|
||||
- p4d_phys = new_area;
|
||||
- pud_phys = p4d_phys + PFN_PHYS(n_p4d);
|
||||
+ pud_phys = new_area;
|
||||
pmd_phys = pud_phys + PFN_PHYS(n_pud);
|
||||
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
|
||||
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
|
||||
|
||||
pgd = __va(read_cr3_pa());
|
||||
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
|
||||
- idx_p4d = 0;
|
||||
save_pud = n_pud;
|
||||
- do {
|
||||
- if (n_p4d > 0) {
|
||||
- p4d = early_memremap(p4d_phys, PAGE_SIZE);
|
||||
- clear_page(p4d);
|
||||
- n_pud = min(save_pud, PTRS_PER_P4D);
|
||||
- }
|
||||
- for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
- pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
- clear_page(pud);
|
||||
- for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
- idx_pmd++) {
|
||||
- pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
- clear_page(pmd);
|
||||
- for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
- idx_pt++) {
|
||||
- pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
- clear_page(pt);
|
||||
- for (idx_pte = 0;
|
||||
- idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
- idx_pte++) {
|
||||
- set_pte(pt + idx_pte,
|
||||
- pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
- p2m_pfn++;
|
||||
- }
|
||||
- n_pte -= PTRS_PER_PTE;
|
||||
- early_memunmap(pt, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(pt_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
- PFN_DOWN(pt_phys));
|
||||
- set_pmd(pmd + idx_pt,
|
||||
- __pmd(_PAGE_TABLE | pt_phys));
|
||||
- pt_phys += PAGE_SIZE;
|
||||
+ for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
+ pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
+ clear_page(pud);
|
||||
+ for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
+ idx_pmd++) {
|
||||
+ pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
+ clear_page(pmd);
|
||||
+ for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
+ idx_pt++) {
|
||||
+ pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
+ clear_page(pt);
|
||||
+ for (idx_pte = 0;
|
||||
+ idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
+ idx_pte++) {
|
||||
+ set_pte(pt + idx_pte,
|
||||
+ pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
+ p2m_pfn++;
|
||||
}
|
||||
- n_pt -= PTRS_PER_PMD;
|
||||
- early_memunmap(pmd, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(pmd_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
- PFN_DOWN(pmd_phys));
|
||||
- set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
- pmd_phys += PAGE_SIZE;
|
||||
+ n_pte -= PTRS_PER_PTE;
|
||||
+ early_memunmap(pt, PAGE_SIZE);
|
||||
+ make_lowmem_page_readonly(__va(pt_phys));
|
||||
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
+ PFN_DOWN(pt_phys));
|
||||
+ set_pmd(pmd + idx_pt,
|
||||
+ __pmd(_PAGE_TABLE | pt_phys));
|
||||
+ pt_phys += PAGE_SIZE;
|
||||
}
|
||||
- n_pmd -= PTRS_PER_PUD;
|
||||
- early_memunmap(pud, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(pud_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
- if (n_p4d > 0)
|
||||
- set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
|
||||
- else
|
||||
- set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
- pud_phys += PAGE_SIZE;
|
||||
- }
|
||||
- if (n_p4d > 0) {
|
||||
- save_pud -= PTRS_PER_P4D;
|
||||
- early_memunmap(p4d, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(p4d_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
|
||||
- set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
|
||||
- p4d_phys += PAGE_SIZE;
|
||||
+ n_pt -= PTRS_PER_PMD;
|
||||
+ early_memunmap(pmd, PAGE_SIZE);
|
||||
+ make_lowmem_page_readonly(__va(pmd_phys));
|
||||
+ pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
+ PFN_DOWN(pmd_phys));
|
||||
+ set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
+ pmd_phys += PAGE_SIZE;
|
||||
}
|
||||
- } while (++idx_p4d < n_p4d);
|
||||
+ n_pmd -= PTRS_PER_PUD;
|
||||
+ early_memunmap(pud, PAGE_SIZE);
|
||||
+ make_lowmem_page_readonly(__va(pud_phys));
|
||||
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
+ set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
+ pud_phys += PAGE_SIZE;
|
||||
+ }
|
||||
|
||||
/* Now copy the old p2m info to the new area. */
|
||||
memcpy(new_p2m, xen_p2m_addr, size);
|
||||
@@ -2386,7 +2347,7 @@ static void __init xen_post_allocator_init(void)
|
||||
pv_mmu_ops.set_pte = xen_set_pte;
|
||||
pv_mmu_ops.set_pmd = xen_set_pmd;
|
||||
pv_mmu_ops.set_pud = xen_set_pud;
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
pv_mmu_ops.set_p4d = xen_set_p4d;
|
||||
#endif
|
||||
|
||||
@@ -2396,7 +2357,7 @@ static void __init xen_post_allocator_init(void)
|
||||
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
|
||||
pv_mmu_ops.release_pte = xen_release_pte;
|
||||
pv_mmu_ops.release_pmd = xen_release_pmd;
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
pv_mmu_ops.alloc_pud = xen_alloc_pud;
|
||||
pv_mmu_ops.release_pud = xen_release_pud;
|
||||
#endif
|
||||
@@ -2460,14 +2421,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
|
||||
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
|
||||
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
|
||||
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
|
||||
.set_p4d = xen_set_p4d_hyper,
|
||||
|
||||
.alloc_pud = xen_alloc_pmd_init,
|
||||
.release_pud = xen_release_pmd_init,
|
||||
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
+#endif /* CONFIG_X86_64 */
|
||||
|
||||
.activate_mm = xen_activate_mm,
|
||||
.dup_mmap = xen_dup_mmap,
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,88 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Dongjiu Geng <gengdongjiu@huawei.com>
|
||||
Date: Tue, 17 Oct 2017 16:02:20 +0800
|
||||
Subject: [PATCH] ACPI / APEI: remove the unused dead-code for SEA/NMI
|
||||
notification type
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
For the SEA notification, the two functions ghes_sea_add() and
|
||||
ghes_sea_remove() are only called when CONFIG_ACPI_APEI_SEA
|
||||
is defined. If not, it will return errors in the ghes_probe()
|
||||
and not continue. If the probe is failed, the ghes_sea_remove()
|
||||
also has no chance to be called. Hence, remove the unnecessary
|
||||
handling when CONFIG_ACPI_APEI_SEA is not defined.
|
||||
|
||||
For the NMI notification, it has the same issue as SEA notification,
|
||||
so also remove the unused dead-code for it.
|
||||
|
||||
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
|
||||
Tested-by: Tyler Baicar <tbaicar@codeaurora.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
(cherry picked from commit c49870e89f4d2c21c76ebe90568246bb0f3572b7)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 55f73c32ba6438e8886f348722d2b25aef129d40)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
drivers/acpi/apei/ghes.c | 33 +++++----------------------------
|
||||
1 file changed, 5 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
|
||||
index 3628078ee351..4827176f838d 100644
|
||||
--- a/drivers/acpi/apei/ghes.c
|
||||
+++ b/drivers/acpi/apei/ghes.c
|
||||
@@ -850,17 +850,8 @@ static void ghes_sea_remove(struct ghes *ghes)
|
||||
synchronize_rcu();
|
||||
}
|
||||
#else /* CONFIG_ACPI_APEI_SEA */
|
||||
-static inline void ghes_sea_add(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
|
||||
- ghes->generic->header.source_id);
|
||||
-}
|
||||
-
|
||||
-static inline void ghes_sea_remove(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
|
||||
- ghes->generic->header.source_id);
|
||||
-}
|
||||
+static inline void ghes_sea_add(struct ghes *ghes) { }
|
||||
+static inline void ghes_sea_remove(struct ghes *ghes) { }
|
||||
#endif /* CONFIG_ACPI_APEI_SEA */
|
||||
|
||||
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
|
||||
@@ -1062,23 +1053,9 @@ static void ghes_nmi_init_cxt(void)
|
||||
init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
|
||||
}
|
||||
#else /* CONFIG_HAVE_ACPI_APEI_NMI */
|
||||
-static inline void ghes_nmi_add(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
|
||||
- ghes->generic->header.source_id);
|
||||
- BUG();
|
||||
-}
|
||||
-
|
||||
-static inline void ghes_nmi_remove(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
|
||||
- ghes->generic->header.source_id);
|
||||
- BUG();
|
||||
-}
|
||||
-
|
||||
-static inline void ghes_nmi_init_cxt(void)
|
||||
-{
|
||||
-}
|
||||
+static inline void ghes_nmi_add(struct ghes *ghes) { }
|
||||
+static inline void ghes_nmi_remove(struct ghes *ghes) { }
|
||||
+static inline void ghes_nmi_init_cxt(void) { }
|
||||
#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
|
||||
|
||||
static int ghes_probe(struct platform_device *ghes_dev)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,78 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 20 Oct 2017 11:21:35 -0500
|
||||
Subject: [PATCH] x86/asm: Don't use the confusing '.ifeq' directive
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
I find the '.ifeq <expression>' directive to be confusing. Reading it
|
||||
quickly seems to suggest its opposite meaning, or that it's missing an
|
||||
argument.
|
||||
|
||||
Improve readability by replacing all of its x86 uses with
|
||||
'.if <expression> == 0'.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andrei Vagin <avagin@virtuozzo.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/757da028e802c7e98d23fbab8d234b1063e161cf.1508516398.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 82c62fa0c49aa305104013cee4468772799bb391)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 981dedac1061fb47d0b04e07f6752be195d7e41a)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 2 +-
|
||||
arch/x86/kernel/head_32.S | 2 +-
|
||||
arch/x86/kernel/head_64.S | 2 +-
|
||||
3 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 2e4fc6425f47..34adfe0221d2 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -830,7 +830,7 @@ ENTRY(\sym)
|
||||
|
||||
ASM_CLAC
|
||||
|
||||
- .ifeq \has_error_code
|
||||
+ .if \has_error_code == 0
|
||||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||
.endif
|
||||
|
||||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
|
||||
index 1f85ee8f9439..337a65377baf 100644
|
||||
--- a/arch/x86/kernel/head_32.S
|
||||
+++ b/arch/x86/kernel/head_32.S
|
||||
@@ -435,7 +435,7 @@ ENTRY(early_idt_handler_array)
|
||||
# 24(%rsp) error code
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||
pushl $0 # Dummy error code, to make stack frame uniform
|
||||
.endif
|
||||
pushl $i # 20(%esp) Vector number
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 12daaa0b187f..a2d8541b1da4 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -258,7 +258,7 @@ ENDPROC(start_cpu0)
|
||||
ENTRY(early_idt_handler_array)
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||
UNWIND_HINT_IRET_REGS
|
||||
pushq $0 # Dummy error code, to make stack frame uniform
|
||||
.else
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,62 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Masahiro Yamada <yamada.masahiro@socionext.com>
|
||||
Date: Fri, 27 Oct 2017 13:11:10 +0900
|
||||
Subject: [PATCH] x86/build: Beautify build log of syscall headers
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This makes the build log look nicer.
|
||||
|
||||
Before:
|
||||
SYSTBL arch/x86/entry/syscalls/../../include/generated/asm/syscalls_32.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/asm/unistd_32_ia32.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/asm/unistd_64_x32.h
|
||||
SYSTBL arch/x86/entry/syscalls/../../include/generated/asm/syscalls_64.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_32.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_64.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_x32.h
|
||||
|
||||
After:
|
||||
SYSTBL arch/x86/include/generated/asm/syscalls_32.h
|
||||
SYSHDR arch/x86/include/generated/asm/unistd_32_ia32.h
|
||||
SYSHDR arch/x86/include/generated/asm/unistd_64_x32.h
|
||||
SYSTBL arch/x86/include/generated/asm/syscalls_64.h
|
||||
SYSHDR arch/x86/include/generated/uapi/asm/unistd_32.h
|
||||
SYSHDR arch/x86/include/generated/uapi/asm/unistd_64.h
|
||||
SYSHDR arch/x86/include/generated/uapi/asm/unistd_x32.h
|
||||
|
||||
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: "H. Peter Anvin" <hpa@zytor.com>
|
||||
Cc: linux-kbuild@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/1509077470-2735-1-git-send-email-yamada.masahiro@socionext.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit af8e947079a7dab0480b5d6db6b093fd04b86fc9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d945957924e9b1a469516b4029fd384138c2cb69)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/syscalls/Makefile | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
|
||||
index 57aa59fd140c..e34c7a931994 100644
|
||||
--- a/arch/x86/entry/syscalls/Makefile
|
||||
+++ b/arch/x86/entry/syscalls/Makefile
|
||||
@@ -1,5 +1,5 @@
|
||||
-out := $(obj)/../../include/generated/asm
|
||||
-uapi := $(obj)/../../include/generated/uapi/asm
|
||||
+out := arch/$(SRCARCH)/include/generated/asm
|
||||
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
|
||||
|
||||
# Create output directory if not already present
|
||||
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,90 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Baoquan He <bhe@redhat.com>
|
||||
Date: Sat, 28 Oct 2017 09:30:38 +0800
|
||||
Subject: [PATCH] x86/mm/64: Rename the register_page_bootmem_memmap() 'size'
|
||||
parameter to 'nr_pages'
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
register_page_bootmem_memmap()'s 3rd 'size' parameter is named
|
||||
in a somewhat misleading fashion - rename it to 'nr_pages' which
|
||||
makes the units of it much clearer.
|
||||
|
||||
Meanwhile rename the existing local variable 'nr_pages' to
|
||||
'nr_pmd_pages', a more expressive name, to avoid conflict with
|
||||
new function parameter 'nr_pages'.
|
||||
|
||||
(Also clean up the unnecessary parentheses in which get_order() is called.)
|
||||
|
||||
Signed-off-by: Baoquan He <bhe@redhat.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: akpm@linux-foundation.org
|
||||
Link: http://lkml.kernel.org/r/1509154238-23250-1-git-send-email-bhe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 15670bfe19905b1dcbb63137f40d718b59d84479)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d73ad1d31ef8a44c6e5977c5123cbaa6d02e2035)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
include/linux/mm.h | 2 +-
|
||||
arch/x86/mm/init_64.c | 10 +++++-----
|
||||
2 files changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
||||
index 07630442bbf2..97f6ca707010 100644
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -2475,7 +2475,7 @@ void vmemmap_populate_print_last(void);
|
||||
void vmemmap_free(unsigned long start, unsigned long end);
|
||||
#endif
|
||||
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
|
||||
- unsigned long size);
|
||||
+ unsigned long nr_pages);
|
||||
|
||||
enum mf_flags {
|
||||
MF_COUNT_INCREASED = 1 << 0,
|
||||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
|
||||
index 136422d7d539..902983c8ea8c 100644
|
||||
--- a/arch/x86/mm/init_64.c
|
||||
+++ b/arch/x86/mm/init_64.c
|
||||
@@ -1418,16 +1418,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
|
||||
void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
- struct page *start_page, unsigned long size)
|
||||
+ struct page *start_page, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long addr = (unsigned long)start_page;
|
||||
- unsigned long end = (unsigned long)(start_page + size);
|
||||
+ unsigned long end = (unsigned long)(start_page + nr_pages);
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
- unsigned int nr_pages;
|
||||
+ unsigned int nr_pmd_pages;
|
||||
struct page *page;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
@@ -1474,9 +1474,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
|
||||
- nr_pages = 1 << (get_order(PMD_SIZE));
|
||||
+ nr_pmd_pages = 1 << get_order(PMD_SIZE);
|
||||
page = pmd_page(*pmd);
|
||||
- while (nr_pages--)
|
||||
+ while (nr_pmd_pages--)
|
||||
get_page_bootmem(section_nr, page++,
|
||||
SECTION_INFO);
|
||||
}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,86 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Gayatri Kammela <gayatri.kammela@intel.com>
|
||||
Date: Mon, 30 Oct 2017 18:20:29 -0700
|
||||
Subject: [PATCH] x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU features
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add a few new SSE/AVX/AVX512 instruction groups/features for enumeration
|
||||
in /proc/cpuinfo: AVX512_VBMI2, GFNI, VAES, VPCLMULQDQ, AVX512_VNNI,
|
||||
AVX512_BITALG.
|
||||
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 6] AVX512_VBMI2
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 8] GFNI
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 9] VAES
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 10] VPCLMULQDQ
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 11] AVX512_VNNI
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 12] AVX512_BITALG
|
||||
|
||||
Detailed information of CPUID bits for these features can be found
|
||||
in the Intel Architecture Instruction Set Extensions and Future Features
|
||||
Programming Interface document (refer to Table 1-1. and Table 1-2.).
|
||||
A copy of this document is available at
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=197239
|
||||
|
||||
Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andi Kleen <andi.kleen@intel.com>
|
||||
Cc: Fenghua Yu <fenghua.yu@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
|
||||
Cc: Ricardo Neri <ricardo.neri@intel.com>
|
||||
Cc: Yang Zhong <yang.zhong@intel.com>
|
||||
Cc: bp@alien8.de
|
||||
Link: http://lkml.kernel.org/r/1509412829-23380-1-git-send-email-gayatri.kammela@intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit c128dbfa0f879f8ce7b79054037889b0b2240728)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b29eb29c5aca4708d66fa977db40c779366636a2)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/cpufeatures.h | 6 ++++++
|
||||
arch/x86/kernel/cpu/cpuid-deps.c | 6 ++++++
|
||||
2 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
||||
index f4e145c4b06f..c465bd6613ed 100644
|
||||
--- a/arch/x86/include/asm/cpufeatures.h
|
||||
+++ b/arch/x86/include/asm/cpufeatures.h
|
||||
@@ -297,6 +297,12 @@
|
||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
|
||||
+#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
|
||||
+#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
|
||||
+#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
|
||||
+#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
|
||||
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
|
||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
index c1d49842a411..c21f22d836ad 100644
|
||||
--- a/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
@@ -50,6 +50,12 @@ const static struct cpuid_dep cpuid_deps[] = {
|
||||
{ X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,363 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Date: Fri, 27 Oct 2017 13:25:28 -0700
|
||||
Subject: [PATCH] x86/mm: Relocate page fault error codes to traps.h
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Up to this point, only fault.c used the definitions of the page fault error
|
||||
codes. Thus, it made sense to keep them within such file. Other portions of
|
||||
code might be interested in those definitions too. For instance, the User-
|
||||
Mode Instruction Prevention emulation code will use such definitions to
|
||||
emulate a page fault when it is unable to successfully copy the results
|
||||
of the emulated instructions to user space.
|
||||
|
||||
While relocating the error code enumeration, the prefix X86_ is used to
|
||||
make it consistent with the rest of the definitions in traps.h. Of course,
|
||||
code using the enumeration had to be updated as well. No functional changes
|
||||
were performed.
|
||||
|
||||
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: "Michael S. Tsirkin" <mst@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: ricardo.neri@intel.com
|
||||
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Cc: Huang Rui <ray.huang@amd.com>
|
||||
Cc: Shuah Khan <shuah@kernel.org>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
|
||||
Cc: Chris Metcalf <cmetcalf@mellanox.com>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Chen Yucong <slaoub@gmail.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Link: https://lkml.kernel.org/r/1509135945-13762-2-git-send-email-ricardo.neri-calderon@linux.intel.com
|
||||
|
||||
(cherry picked from commit 1067f030994c69ca1fba8c607437c8895dcf8509)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a85a07ab9111e3c78797c20b60a664dbd5db4981)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/traps.h | 18 +++++++++
|
||||
arch/x86/mm/fault.c | 88 +++++++++++++++++---------------------------
|
||||
2 files changed, 52 insertions(+), 54 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index feb89dbe359d..8e5bf86f87e5 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -162,4 +162,22 @@ enum {
|
||||
X86_TRAP_IRET = 32, /* 32, IRET Exception */
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * Page fault error code bits:
|
||||
+ *
|
||||
+ * bit 0 == 0: no page found 1: protection fault
|
||||
+ * bit 1 == 0: read access 1: write access
|
||||
+ * bit 2 == 0: kernel-mode access 1: user-mode access
|
||||
+ * bit 3 == 1: use of reserved bit detected
|
||||
+ * bit 4 == 1: fault was an instruction fetch
|
||||
+ * bit 5 == 1: protection keys block access
|
||||
+ */
|
||||
+enum x86_pf_error_code {
|
||||
+ X86_PF_PROT = 1 << 0,
|
||||
+ X86_PF_WRITE = 1 << 1,
|
||||
+ X86_PF_USER = 1 << 2,
|
||||
+ X86_PF_RSVD = 1 << 3,
|
||||
+ X86_PF_INSTR = 1 << 4,
|
||||
+ X86_PF_PK = 1 << 5,
|
||||
+};
|
||||
#endif /* _ASM_X86_TRAPS_H */
|
||||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
|
||||
index 4ee9eb916826..d3a57e7ad311 100644
|
||||
--- a/arch/x86/mm/fault.c
|
||||
+++ b/arch/x86/mm/fault.c
|
||||
@@ -28,26 +28,6 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/exceptions.h>
|
||||
|
||||
-/*
|
||||
- * Page fault error code bits:
|
||||
- *
|
||||
- * bit 0 == 0: no page found 1: protection fault
|
||||
- * bit 1 == 0: read access 1: write access
|
||||
- * bit 2 == 0: kernel-mode access 1: user-mode access
|
||||
- * bit 3 == 1: use of reserved bit detected
|
||||
- * bit 4 == 1: fault was an instruction fetch
|
||||
- * bit 5 == 1: protection keys block access
|
||||
- */
|
||||
-enum x86_pf_error_code {
|
||||
-
|
||||
- PF_PROT = 1 << 0,
|
||||
- PF_WRITE = 1 << 1,
|
||||
- PF_USER = 1 << 2,
|
||||
- PF_RSVD = 1 << 3,
|
||||
- PF_INSTR = 1 << 4,
|
||||
- PF_PK = 1 << 5,
|
||||
-};
|
||||
-
|
||||
/*
|
||||
* Returns 0 if mmiotrace is disabled, or if the fault is not
|
||||
* handled by mmiotrace:
|
||||
@@ -149,7 +129,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
* If it was a exec (instruction fetch) fault on NX page, then
|
||||
* do not ignore the fault:
|
||||
*/
|
||||
- if (error_code & PF_INSTR)
|
||||
+ if (error_code & X86_PF_INSTR)
|
||||
return 0;
|
||||
|
||||
instr = (void *)convert_ip_to_linear(current, regs);
|
||||
@@ -179,7 +159,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
* siginfo so userspace can discover which protection key was set
|
||||
* on the PTE.
|
||||
*
|
||||
- * If we get here, we know that the hardware signaled a PF_PK
|
||||
+ * If we get here, we know that the hardware signaled a X86_PF_PK
|
||||
* fault and that there was a VMA once we got in the fault
|
||||
* handler. It does *not* guarantee that the VMA we find here
|
||||
* was the one that we faulted on.
|
||||
@@ -204,7 +184,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
|
||||
/*
|
||||
* force_sig_info_fault() is called from a number of
|
||||
* contexts, some of which have a VMA and some of which
|
||||
- * do not. The PF_PK handing happens after we have a
|
||||
+ * do not. The X86_PF_PK handing happens after we have a
|
||||
* valid VMA, so we should never reach this without a
|
||||
* valid VMA.
|
||||
*/
|
||||
@@ -693,7 +673,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||
if (!oops_may_print())
|
||||
return;
|
||||
|
||||
- if (error_code & PF_INSTR) {
|
||||
+ if (error_code & X86_PF_INSTR) {
|
||||
unsigned int level;
|
||||
pgd_t *pgd;
|
||||
pte_t *pte;
|
||||
@@ -775,7 +755,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||
*/
|
||||
if (current->thread.sig_on_uaccess_err && signal) {
|
||||
tsk->thread.trap_nr = X86_TRAP_PF;
|
||||
- tsk->thread.error_code = error_code | PF_USER;
|
||||
+ tsk->thread.error_code = error_code | X86_PF_USER;
|
||||
tsk->thread.cr2 = address;
|
||||
|
||||
/* XXX: hwpoison faults will set the wrong code. */
|
||||
@@ -894,7 +874,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
/* User mode accesses just cause a SIGSEGV */
|
||||
- if (error_code & PF_USER) {
|
||||
+ if (error_code & X86_PF_USER) {
|
||||
/*
|
||||
* It's possible to have interrupts off here:
|
||||
*/
|
||||
@@ -915,7 +895,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
* Instruction fetch faults in the vsyscall page might need
|
||||
* emulation.
|
||||
*/
|
||||
- if (unlikely((error_code & PF_INSTR) &&
|
||||
+ if (unlikely((error_code & X86_PF_INSTR) &&
|
||||
((address & ~0xfff) == VSYSCALL_ADDR))) {
|
||||
if (emulate_vsyscall(regs, address))
|
||||
return;
|
||||
@@ -928,7 +908,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
* are always protection faults.
|
||||
*/
|
||||
if (address >= TASK_SIZE_MAX)
|
||||
- error_code |= PF_PROT;
|
||||
+ error_code |= X86_PF_PROT;
|
||||
|
||||
if (likely(show_unhandled_signals))
|
||||
show_signal_msg(regs, error_code, address, tsk);
|
||||
@@ -989,11 +969,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
return false;
|
||||
- if (error_code & PF_PK)
|
||||
+ if (error_code & X86_PF_PK)
|
||||
return true;
|
||||
/* this checks permission keys on the VMA: */
|
||||
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
||||
- (error_code & PF_INSTR), foreign))
|
||||
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||
+ (error_code & X86_PF_INSTR), foreign))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@@ -1021,7 +1001,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
||||
int code = BUS_ADRERR;
|
||||
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
- if (!(error_code & PF_USER)) {
|
||||
+ if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
||||
return;
|
||||
}
|
||||
@@ -1049,14 +1029,14 @@ static noinline void
|
||||
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address, u32 *pkey, unsigned int fault)
|
||||
{
|
||||
- if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
|
||||
+ if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fault & VM_FAULT_OOM) {
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
- if (!(error_code & PF_USER)) {
|
||||
+ if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address,
|
||||
SIGSEGV, SEGV_MAPERR);
|
||||
return;
|
||||
@@ -1081,16 +1061,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||
|
||||
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
||||
{
|
||||
- if ((error_code & PF_WRITE) && !pte_write(*pte))
|
||||
+ if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
|
||||
return 0;
|
||||
|
||||
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
|
||||
+ if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
|
||||
return 0;
|
||||
/*
|
||||
* Note: We do not do lazy flushing on protection key
|
||||
- * changes, so no spurious fault will ever set PF_PK.
|
||||
+ * changes, so no spurious fault will ever set X86_PF_PK.
|
||||
*/
|
||||
- if ((error_code & PF_PK))
|
||||
+ if ((error_code & X86_PF_PK))
|
||||
return 1;
|
||||
|
||||
return 1;
|
||||
@@ -1136,8 +1116,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
|
||||
* change, so user accesses are not expected to cause spurious
|
||||
* faults.
|
||||
*/
|
||||
- if (error_code != (PF_WRITE | PF_PROT)
|
||||
- && error_code != (PF_INSTR | PF_PROT))
|
||||
+ if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
|
||||
+ error_code != (X86_PF_INSTR | X86_PF_PROT))
|
||||
return 0;
|
||||
|
||||
pgd = init_mm.pgd + pgd_index(address);
|
||||
@@ -1197,19 +1177,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||
* always an unconditional error and can never result in
|
||||
* a follow-up action to resolve the fault, like a COW.
|
||||
*/
|
||||
- if (error_code & PF_PK)
|
||||
+ if (error_code & X86_PF_PK)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Make sure to check the VMA so that we do not perform
|
||||
- * faults just to hit a PF_PK as soon as we fill in a
|
||||
+ * faults just to hit a X86_PF_PK as soon as we fill in a
|
||||
* page.
|
||||
*/
|
||||
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
||||
- (error_code & PF_INSTR), foreign))
|
||||
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||
+ (error_code & X86_PF_INSTR), foreign))
|
||||
return 1;
|
||||
|
||||
- if (error_code & PF_WRITE) {
|
||||
+ if (error_code & X86_PF_WRITE) {
|
||||
/* write, present and write, not present: */
|
||||
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
||||
return 1;
|
||||
@@ -1217,7 +1197,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
/* read, present: */
|
||||
- if (unlikely(error_code & PF_PROT))
|
||||
+ if (unlikely(error_code & X86_PF_PROT))
|
||||
return 1;
|
||||
|
||||
/* read, not present: */
|
||||
@@ -1240,7 +1220,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
|
||||
if (!static_cpu_has(X86_FEATURE_SMAP))
|
||||
return false;
|
||||
|
||||
- if (error_code & PF_USER)
|
||||
+ if (error_code & X86_PF_USER)
|
||||
return false;
|
||||
|
||||
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
|
||||
@@ -1293,7 +1273,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
* protection error (error_code & 9) == 0.
|
||||
*/
|
||||
if (unlikely(fault_in_kernel_space(address))) {
|
||||
- if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
|
||||
+ if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
|
||||
if (vmalloc_fault(address) >= 0)
|
||||
return;
|
||||
|
||||
@@ -1321,7 +1301,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
if (unlikely(kprobes_fault(regs)))
|
||||
return;
|
||||
|
||||
- if (unlikely(error_code & PF_RSVD))
|
||||
+ if (unlikely(error_code & X86_PF_RSVD))
|
||||
pgtable_bad(regs, error_code, address);
|
||||
|
||||
if (unlikely(smap_violation(error_code, regs))) {
|
||||
@@ -1347,7 +1327,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
*/
|
||||
if (user_mode(regs)) {
|
||||
local_irq_enable();
|
||||
- error_code |= PF_USER;
|
||||
+ error_code |= X86_PF_USER;
|
||||
flags |= FAULT_FLAG_USER;
|
||||
} else {
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
@@ -1356,9 +1336,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
- if (error_code & PF_WRITE)
|
||||
+ if (error_code & X86_PF_WRITE)
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
- if (error_code & PF_INSTR)
|
||||
+ if (error_code & X86_PF_INSTR)
|
||||
flags |= FAULT_FLAG_INSTRUCTION;
|
||||
|
||||
/*
|
||||
@@ -1378,7 +1358,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
* space check, thus avoiding the deadlock:
|
||||
*/
|
||||
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
||||
- if ((error_code & PF_USER) == 0 &&
|
||||
+ if (!(error_code & X86_PF_USER) &&
|
||||
!search_exception_tables(regs->ip)) {
|
||||
bad_area_nosemaphore(regs, error_code, address, NULL);
|
||||
return;
|
||||
@@ -1405,7 +1385,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
- if (error_code & PF_USER) {
|
||||
+ if (error_code & X86_PF_USER) {
|
||||
/*
|
||||
* Accessing the stack below %sp is always a bug.
|
||||
* The large cushion allows instructions like enter
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,103 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Date: Fri, 27 Oct 2017 13:25:29 -0700
|
||||
Subject: [PATCH] x86/boot: Relocate definition of the initial state of CR0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Both head_32.S and head_64.S utilize the same value to initialize the
|
||||
control register CR0. Also, other parts of the kernel might want to access
|
||||
this initial definition (e.g., emulation code for User-Mode Instruction
|
||||
Prevention uses this state to provide a sane dummy value for CR0 when
|
||||
emulating the smsw instruction). Thus, relocate this definition to a
|
||||
header file from which it can be conveniently accessed.
|
||||
|
||||
Suggested-by: Borislav Petkov <bp@alien8.de>
|
||||
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: "Michael S. Tsirkin" <mst@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: ricardo.neri@intel.com
|
||||
Cc: linux-mm@kvack.org
|
||||
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Cc: Huang Rui <ray.huang@amd.com>
|
||||
Cc: Shuah Khan <shuah@kernel.org>
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: Chris Metcalf <cmetcalf@mellanox.com>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Chen Yucong <slaoub@gmail.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Link: https://lkml.kernel.org/r/1509135945-13762-3-git-send-email-ricardo.neri-calderon@linux.intel.com
|
||||
|
||||
(cherry picked from commit b0ce5b8c95c83a7b98c679b117e3d6ae6f97154b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27c31a88c22edab269abe17c0ac7db0351d26c5f)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/uapi/asm/processor-flags.h | 3 +++
|
||||
arch/x86/kernel/head_32.S | 3 ---
|
||||
arch/x86/kernel/head_64.S | 3 ---
|
||||
3 files changed, 3 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
|
||||
index 185f3d10c194..39946d0a1d41 100644
|
||||
--- a/arch/x86/include/uapi/asm/processor-flags.h
|
||||
+++ b/arch/x86/include/uapi/asm/processor-flags.h
|
||||
@@ -151,5 +151,8 @@
|
||||
#define CX86_ARR_BASE 0xc4
|
||||
#define CX86_RCR_BASE 0xdc
|
||||
|
||||
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
+ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
+ X86_CR0_PG)
|
||||
|
||||
#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
|
||||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
|
||||
index 337a65377baf..7bbcdb1ea31a 100644
|
||||
--- a/arch/x86/kernel/head_32.S
|
||||
+++ b/arch/x86/kernel/head_32.S
|
||||
@@ -213,9 +213,6 @@ ENTRY(startup_32_smp)
|
||||
#endif
|
||||
|
||||
.Ldefault_entry:
|
||||
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
- X86_CR0_PG)
|
||||
movl $(CR0_STATE & ~X86_CR0_PG),%eax
|
||||
movl %eax,%cr0
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index a2d8541b1da4..4117c1e0b3d2 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -137,9 +137,6 @@ ENTRY(secondary_startup_64)
|
||||
1: wrmsr /* Make changes effective */
|
||||
|
||||
/* Setup cr0 */
|
||||
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
- X86_CR0_PG)
|
||||
movl $CR0_STATE, %eax
|
||||
/* Make changes effective */
|
||||
movq %rax, %cr0
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,92 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Date: Fri, 27 Oct 2017 13:25:30 -0700
|
||||
Subject: [PATCH] ptrace,x86: Make user_64bit_mode() available to 32-bit builds
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
In its current form, user_64bit_mode() can only be used when CONFIG_X86_64
|
||||
is selected. This implies that code built with CONFIG_X86_64=n cannot use
|
||||
it. If a piece of code needs to be built for both CONFIG_X86_64=y and
|
||||
CONFIG_X86_64=n and wants to use this function, it needs to wrap it in
|
||||
an #ifdef/#endif; potentially, in multiple places.
|
||||
|
||||
This can be easily avoided with a single #ifdef/#endif pair within
|
||||
user_64bit_mode() itself.
|
||||
|
||||
Suggested-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: "Michael S. Tsirkin" <mst@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: ricardo.neri@intel.com
|
||||
Cc: Adrian Hunter <adrian.hunter@intel.com>
|
||||
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Cc: Huang Rui <ray.huang@amd.com>
|
||||
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
|
||||
Cc: Shuah Khan <shuah@kernel.org>
|
||||
Cc: Kees Cook <keescook@chromium.org>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Dmitry Vyukov <dvyukov@google.com>
|
||||
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
|
||||
Cc: Chris Metcalf <cmetcalf@mellanox.com>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Colin Ian King <colin.king@canonical.com>
|
||||
Cc: Chen Yucong <slaoub@gmail.com>
|
||||
Cc: Adam Buchbinder <adam.buchbinder@gmail.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
|
||||
Cc: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Thomas Garnier <thgarnie@google.com>
|
||||
Link: https://lkml.kernel.org/r/1509135945-13762-4-git-send-email-ricardo.neri-calderon@linux.intel.com
|
||||
|
||||
(cherry picked from commit e27c310af5c05cf876d9cad006928076c27f54d4)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 20ddf08f867d3d96788299cd2fb7676590d64250)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/ptrace.h | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
|
||||
index 2b5d686ea9f3..ea78a8438a8a 100644
|
||||
--- a/arch/x86/include/asm/ptrace.h
|
||||
+++ b/arch/x86/include/asm/ptrace.h
|
||||
@@ -115,9 +115,9 @@ static inline int v8086_mode(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_X86_64
|
||||
static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
{
|
||||
+#ifdef CONFIG_X86_64
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
/*
|
||||
* On non-paravirt systems, this is the only long mode CPL 3
|
||||
@@ -128,8 +128,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
/* Headers are too twisted for this to go in paravirt.h. */
|
||||
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
|
||||
#endif
|
||||
+#else /* !CONFIG_X86_64 */
|
||||
+ return false;
|
||||
+#endif
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
#define current_user_stack_pointer() current_pt_regs()->sp
|
||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||
#endif
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,74 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:58:58 -0700
|
||||
Subject: [PATCH] x86/entry/64: Remove the restore_c_regs_and_iret label
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The only user was the 64-bit opportunistic SYSRET failure path, and
|
||||
that path didn't really need it. This change makes the
|
||||
opportunistic SYSRET code a bit more straightforward and gets rid of
|
||||
the label.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/be3006a7ad3326e3458cf1cc55d416252cbe1986.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 9da78ba6b47b46428cfdfc0851511ab29c869798)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 629c8b858cbe72e88e7f44a8f10e1b434ab80721)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 34adfe0221d2..fac354ddf056 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -245,7 +245,6 @@ entry_SYSCALL64_slow_path:
|
||||
call do_syscall_64 /* returns with IRQs disabled */
|
||||
|
||||
return_from_SYSCALL_64:
|
||||
- RESTORE_EXTRA_REGS
|
||||
TRACE_IRQS_IRETQ /* we're about to change IF */
|
||||
|
||||
/*
|
||||
@@ -314,6 +313,7 @@ return_from_SYSCALL_64:
|
||||
*/
|
||||
syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
+ RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
@@ -321,7 +321,7 @@ syscall_return_via_sysret:
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
SWAPGS
|
||||
- jmp restore_c_regs_and_iret
|
||||
+ jmp restore_regs_and_iret
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
@@ -638,7 +638,6 @@ retint_kernel:
|
||||
*/
|
||||
GLOBAL(restore_regs_and_iret)
|
||||
RESTORE_EXTRA_REGS
|
||||
-restore_c_regs_and_iret:
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
INTERRUPT_RETURN
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,134 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:58:59 -0700
|
||||
Subject: [PATCH] x86/entry/64: Split the IRET-to-user and IRET-to-kernel paths
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
These code paths will diverge soon.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/dccf8c7b3750199b4b30383c812d4e2931811509.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 64adfba0aeb668304d171c383ac80b22158ec128)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 34 +++++++++++++++++++++++++---------
|
||||
arch/x86/entry/entry_64_compat.S | 2 +-
|
||||
arch/x86/kernel/head_64.S | 2 +-
|
||||
3 files changed, 27 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index fac354ddf056..e546441fbec3 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -321,7 +321,7 @@ syscall_return_via_sysret:
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
@@ -423,7 +423,7 @@ ENTRY(ret_from_fork)
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
|
||||
1:
|
||||
/* kernel thread */
|
||||
@@ -612,7 +612,20 @@ GLOBAL(retint_user)
|
||||
call prepare_exit_to_usermode
|
||||
TRACE_IRQS_IRETQ
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+
|
||||
+GLOBAL(restore_regs_and_return_to_usermode)
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ /* Assert that pt_regs indicates user mode. */
|
||||
+ testl $3, CS(%rsp)
|
||||
+ jnz 1f
|
||||
+ ud2
|
||||
+1:
|
||||
+#endif
|
||||
+ RESTORE_EXTRA_REGS
|
||||
+ RESTORE_C_REGS
|
||||
+ REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
+ INTERRUPT_RETURN
|
||||
+
|
||||
|
||||
/* Returning to kernel space */
|
||||
retint_kernel:
|
||||
@@ -632,11 +645,14 @@ retint_kernel:
|
||||
*/
|
||||
TRACE_IRQS_IRETQ
|
||||
|
||||
-/*
|
||||
- * At this label, code paths which return to kernel and to user,
|
||||
- * which come from interrupts/exception and from syscalls, merge.
|
||||
- */
|
||||
-GLOBAL(restore_regs_and_iret)
|
||||
+GLOBAL(restore_regs_and_return_to_kernel)
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ /* Assert that pt_regs indicates kernel mode. */
|
||||
+ testl $3, CS(%rsp)
|
||||
+ jz 1f
|
||||
+ ud2
|
||||
+1:
|
||||
+#endif
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
@@ -1340,7 +1356,7 @@ ENTRY(nmi)
|
||||
* work, because we don't want to enable interrupts.
|
||||
*/
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
|
||||
.Lnmi_from_kernel:
|
||||
/*
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index d8468ba24be0..2b3a88feaa2b 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -337,7 +337,7 @@ ENTRY(entry_INT80_compat)
|
||||
/* Go back to user mode. */
|
||||
TRACE_IRQS_ON
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
END(entry_INT80_compat)
|
||||
|
||||
ALIGN
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 4117c1e0b3d2..e785734980ad 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -311,7 +311,7 @@ early_idt_handler_common:
|
||||
|
||||
20:
|
||||
decl early_recursion_flag(%rip)
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_kernel
|
||||
END(early_idt_handler_common)
|
||||
|
||||
__INITDATA
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,156 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:00 -0700
|
||||
Subject: [PATCH] x86/entry/64: Move SWAPGS into the common IRET-to-usermode
|
||||
path
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
All of the code paths that ended up doing IRET to usermode did
|
||||
SWAPGS immediately beforehand. Move the SWAPGS into the common
|
||||
code.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/27fd6f45b7cd640de38fb9066fd0349bcd11f8e1.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 8a055d7f411d41755ce30db5bb65b154777c4b78)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 62a85594f9be3baeb2495089f1c2980bc497d03b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 32 ++++++++++++++------------------
|
||||
arch/x86/entry/entry_64_compat.S | 3 +--
|
||||
2 files changed, 15 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index e546441fbec3..7c8258e3ad2d 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -249,12 +249,14 @@ return_from_SYSCALL_64:
|
||||
|
||||
/*
|
||||
* Try to use SYSRET instead of IRET if we're returning to
|
||||
- * a completely clean 64-bit userspace context.
|
||||
+ * a completely clean 64-bit userspace context. If we're not,
|
||||
+ * go to the slow exit path.
|
||||
*/
|
||||
movq RCX(%rsp), %rcx
|
||||
movq RIP(%rsp), %r11
|
||||
- cmpq %rcx, %r11 /* RCX == RIP */
|
||||
- jne opportunistic_sysret_failed
|
||||
+
|
||||
+ cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||
@@ -272,14 +274,14 @@ return_from_SYSCALL_64:
|
||||
|
||||
/* If this changed %rcx, it was not canonical */
|
||||
cmpq %rcx, %r11
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
movq R11(%rsp), %r11
|
||||
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
|
||||
@@ -300,12 +302,12 @@ return_from_SYSCALL_64:
|
||||
* would never get past 'stuck_here'.
|
||||
*/
|
||||
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
|
||||
- jnz opportunistic_sysret_failed
|
||||
+ jnz swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/* nothing to check for RSP */
|
||||
|
||||
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* We win! This label is here just for ease of understanding
|
||||
@@ -318,10 +320,6 @@ syscall_return_via_sysret:
|
||||
movq RSP(%rsp), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
-
|
||||
-opportunistic_sysret_failed:
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
@@ -422,8 +420,7 @@ ENTRY(ret_from_fork)
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
+ jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
1:
|
||||
/* kernel thread */
|
||||
@@ -611,9 +608,8 @@ GLOBAL(retint_user)
|
||||
mov %rsp,%rdi
|
||||
call prepare_exit_to_usermode
|
||||
TRACE_IRQS_IRETQ
|
||||
- SWAPGS
|
||||
|
||||
-GLOBAL(restore_regs_and_return_to_usermode)
|
||||
+GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
/* Assert that pt_regs indicates user mode. */
|
||||
testl $3, CS(%rsp)
|
||||
@@ -621,6 +617,7 @@ GLOBAL(restore_regs_and_return_to_usermode)
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
+ SWAPGS
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
@@ -1355,8 +1352,7 @@ ENTRY(nmi)
|
||||
* Return back to user mode. We must *not* do the normal exit
|
||||
* work, because we don't want to enable interrupts.
|
||||
*/
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
+ jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
.Lnmi_from_kernel:
|
||||
/*
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index 2b3a88feaa2b..be745b7a3e3e 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -336,8 +336,7 @@ ENTRY(entry_INT80_compat)
|
||||
|
||||
/* Go back to user mode. */
|
||||
TRACE_IRQS_ON
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
+ jmp swapgs_restore_regs_and_return_to_usermode
|
||||
END(entry_INT80_compat)
|
||||
|
||||
ALIGN
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,103 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:01 -0700
|
||||
Subject: [PATCH] x86/entry/64: Simplify reg restore code in the standard IRET
|
||||
paths
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The old code restored all the registers with movq instead of pop.
|
||||
|
||||
In theory, this was done because some CPUs have higher movq
|
||||
throughput, but any gain there would be tiny and is almost certainly
|
||||
outweighed by the higher text size.
|
||||
|
||||
This saves 96 bytes of text.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/ad82520a207ccd851b04ba613f4f752b33ac05f7.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e872045bfd9c465a8555bab4b8567d56a4d2d3bb)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit f926575cd370de4052e89477582b349af5664a56)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/calling.h | 21 +++++++++++++++++++++
|
||||
arch/x86/entry/entry_64.S | 12 ++++++------
|
||||
2 files changed, 27 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
||||
index 640aafebdc00..0b9dd8123701 100644
|
||||
--- a/arch/x86/entry/calling.h
|
||||
+++ b/arch/x86/entry/calling.h
|
||||
@@ -151,6 +151,27 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
UNWIND_HINT_REGS offset=\offset extra=0
|
||||
.endm
|
||||
|
||||
+ .macro POP_EXTRA_REGS
|
||||
+ popq %r15
|
||||
+ popq %r14
|
||||
+ popq %r13
|
||||
+ popq %r12
|
||||
+ popq %rbp
|
||||
+ popq %rbx
|
||||
+ .endm
|
||||
+
|
||||
+ .macro POP_C_REGS
|
||||
+ popq %r11
|
||||
+ popq %r10
|
||||
+ popq %r9
|
||||
+ popq %r8
|
||||
+ popq %rax
|
||||
+ popq %rcx
|
||||
+ popq %rdx
|
||||
+ popq %rsi
|
||||
+ popq %rdi
|
||||
+ .endm
|
||||
+
|
||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
.if \rstor_r11
|
||||
movq 6*8(%rsp), %r11
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 7c8258e3ad2d..a1a86e782a0e 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -618,9 +618,9 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||
1:
|
||||
#endif
|
||||
SWAPGS
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
+ POP_EXTRA_REGS
|
||||
+ POP_C_REGS
|
||||
+ addq $8, %rsp /* skip regs->orig_ax */
|
||||
INTERRUPT_RETURN
|
||||
|
||||
|
||||
@@ -650,9 +650,9 @@ GLOBAL(restore_regs_and_return_to_kernel)
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
+ POP_EXTRA_REGS
|
||||
+ POP_C_REGS
|
||||
+ addq $8, %rsp /* skip regs->orig_ax */
|
||||
INTERRUPT_RETURN
|
||||
|
||||
ENTRY(native_iret)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:02 -0700
|
||||
Subject: [PATCH] x86/entry/64: Shrink paranoid_exit_restore and make labels
|
||||
local
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
paranoid_exit_restore was a copy of restore_regs_and_return_to_kernel.
|
||||
Merge them and make the paranoid_exit internal labels local.
|
||||
|
||||
Keeping .Lparanoid_exit makes the code a bit shorter because it
|
||||
allows a 2-byte jnz instead of a 5-byte jnz.
|
||||
|
||||
Saves 96 bytes of text.
|
||||
|
||||
( This is still a bit suboptimal in a non-CONFIG_TRACE_IRQFLAGS
|
||||
kernel, but fixing that would make the code rather messy. )
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/510d66a1895cda9473c84b1086f0bb974f22de6a.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e53178328c9b96fbdbc719e78c93b5687ee007c3)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit fb53fe10add935c3d0eb63199e43426eaf3b4299)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 13 +++++--------
|
||||
1 file changed, 5 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index a1a86e782a0e..6995f7e08aa1 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1136,17 +1136,14 @@ ENTRY(paranoid_exit)
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF_DEBUG
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
- jnz paranoid_exit_no_swapgs
|
||||
+ jnz .Lparanoid_exit_no_swapgs
|
||||
TRACE_IRQS_IRETQ
|
||||
SWAPGS_UNSAFE_STACK
|
||||
- jmp paranoid_exit_restore
|
||||
-paranoid_exit_no_swapgs:
|
||||
+ jmp .Lparanoid_exit_restore
|
||||
+.Lparanoid_exit_no_swapgs:
|
||||
TRACE_IRQS_IRETQ_DEBUG
|
||||
-paranoid_exit_restore:
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
- INTERRUPT_RETURN
|
||||
+.Lparanoid_exit_restore:
|
||||
+ jmp restore_regs_and_return_to_kernel
|
||||
END(paranoid_exit)
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:03 -0700
|
||||
Subject: [PATCH] x86/entry/64: Use pop instead of movq in
|
||||
syscall_return_via_sysret
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Saves 64 bytes.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/6609b7f74ab31c36604ad746e019ea8495aec76c.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 4fbb39108f972437c44e5ffa781b56635d496826)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 1e9a9d5ef9f65eeb26eb8f0974dd3e693894baf1)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 14 +++++++++++---
|
||||
1 file changed, 11 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 6995f7e08aa1..33a416c7df2d 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -315,10 +315,18 @@ return_from_SYSCALL_64:
|
||||
*/
|
||||
syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
- movq RSP(%rsp), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
+ POP_EXTRA_REGS
|
||||
+ popq %rsi /* skip r11 */
|
||||
+ popq %r10
|
||||
+ popq %r9
|
||||
+ popq %r8
|
||||
+ popq %rax
|
||||
+ popq %rsi /* skip rcx */
|
||||
+ popq %rdx
|
||||
+ popq %rsi
|
||||
+ popq %rdi
|
||||
+ movq RSP-ORIG_RAX(%rsp), %rsp
|
||||
USERGS_SYSRET64
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,60 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:04 -0700
|
||||
Subject: [PATCH] x86/entry/64: Merge the fast and slow SYSRET paths
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
They did almost the same thing. Remove a bunch of pointless
|
||||
instructions (mostly hidden in macros) and reduce cognitive load by
|
||||
merging them.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/1204e20233fcab9130a1ba80b3b1879b5db3fc1f.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit a512210643da8082cb44181dba8b18e752bd68f0)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 7c4575d8bb2d01960ba9b9840fa22460e0179eca)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 33a416c7df2d..87be1cd1fa88 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -220,10 +220,9 @@ entry_SYSCALL_64_fastpath:
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQs on */
|
||||
movq RIP(%rsp), %rcx
|
||||
movq EFLAGS(%rsp), %r11
|
||||
- RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
- movq RSP(%rsp), %rsp
|
||||
+ addq $6*8, %rsp /* skip extra regs -- they were preserved */
|
||||
UNWIND_HINT_EMPTY
|
||||
- USERGS_SYSRET64
|
||||
+ jmp .Lpop_c_regs_except_rcx_r11_and_sysret
|
||||
|
||||
1:
|
||||
/*
|
||||
@@ -317,6 +316,7 @@ syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
UNWIND_HINT_EMPTY
|
||||
POP_EXTRA_REGS
|
||||
+.Lpop_c_regs_except_rcx_r11_and_sysret:
|
||||
popq %rsi /* skip r11 */
|
||||
popq %r10
|
||||
popq %r9
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,57 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:05 -0700
|
||||
Subject: [PATCH] x86/entry/64: Use POP instead of MOV to restore regs on NMI
|
||||
return
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This gets rid of the last user of the old RESTORE_..._REGS infrastructure.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/652a260f17a160789bc6a41d997f98249b73e2ab.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 471ee4832209e986029b9fabdaad57b1eecb856b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 3c5771a43d8f00e53081871027fea891a091ff5e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 11 +++++++----
|
||||
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 87be1cd1fa88..4eff3aca54ed 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1572,11 +1572,14 @@ end_repeat_nmi:
|
||||
nmi_swapgs:
|
||||
SWAPGS_UNSAFE_STACK
|
||||
nmi_restore:
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
+ POP_EXTRA_REGS
|
||||
+ POP_C_REGS
|
||||
|
||||
- /* Point RSP at the "iret" frame. */
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 6*8
|
||||
+ /*
|
||||
+ * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
|
||||
+ * at the "iret" frame.
|
||||
+ */
|
||||
+ addq $6*8, %rsp
|
||||
|
||||
/*
|
||||
* Clear "NMI executing". Set DF first so that we can easily
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,104 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:06 -0700
|
||||
Subject: [PATCH] x86/entry/64: Remove the RESTORE_..._REGS infrastructure
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
All users of RESTORE_EXTRA_REGS, RESTORE_C_REGS and such, and
|
||||
REMOVE_PT_GPREGS_FROM_STACK are gone. Delete the macros.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/c32672f6e47c561893316d48e06c7656b1039a36.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit c39858de696f0cc160a544455e8403d663d577e9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d248c62028c5467cd5a5ce06d344e3fb330da3ec)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/calling.h | 52 ------------------------------------------------
|
||||
1 file changed, 52 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
||||
index 0b9dd8123701..1895a685d3dd 100644
|
||||
--- a/arch/x86/entry/calling.h
|
||||
+++ b/arch/x86/entry/calling.h
|
||||
@@ -141,16 +141,6 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
UNWIND_HINT_REGS offset=\offset
|
||||
.endm
|
||||
|
||||
- .macro RESTORE_EXTRA_REGS offset=0
|
||||
- movq 0*8+\offset(%rsp), %r15
|
||||
- movq 1*8+\offset(%rsp), %r14
|
||||
- movq 2*8+\offset(%rsp), %r13
|
||||
- movq 3*8+\offset(%rsp), %r12
|
||||
- movq 4*8+\offset(%rsp), %rbp
|
||||
- movq 5*8+\offset(%rsp), %rbx
|
||||
- UNWIND_HINT_REGS offset=\offset extra=0
|
||||
- .endm
|
||||
-
|
||||
.macro POP_EXTRA_REGS
|
||||
popq %r15
|
||||
popq %r14
|
||||
@@ -172,48 +162,6 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
popq %rdi
|
||||
.endm
|
||||
|
||||
- .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
- .if \rstor_r11
|
||||
- movq 6*8(%rsp), %r11
|
||||
- .endif
|
||||
- .if \rstor_r8910
|
||||
- movq 7*8(%rsp), %r10
|
||||
- movq 8*8(%rsp), %r9
|
||||
- movq 9*8(%rsp), %r8
|
||||
- .endif
|
||||
- .if \rstor_rax
|
||||
- movq 10*8(%rsp), %rax
|
||||
- .endif
|
||||
- .if \rstor_rcx
|
||||
- movq 11*8(%rsp), %rcx
|
||||
- .endif
|
||||
- .if \rstor_rdx
|
||||
- movq 12*8(%rsp), %rdx
|
||||
- .endif
|
||||
- movq 13*8(%rsp), %rsi
|
||||
- movq 14*8(%rsp), %rdi
|
||||
- UNWIND_HINT_IRET_REGS offset=16*8
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS
|
||||
- RESTORE_C_REGS_HELPER 1,1,1,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_RAX
|
||||
- RESTORE_C_REGS_HELPER 0,1,1,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_RCX
|
||||
- RESTORE_C_REGS_HELPER 1,0,1,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_R11
|
||||
- RESTORE_C_REGS_HELPER 1,1,0,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
- RESTORE_C_REGS_HELPER 1,0,0,1,1
|
||||
- .endm
|
||||
-
|
||||
- .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
|
||||
- subq $-(15*8+\addskip), %rsp
|
||||
- .endm
|
||||
-
|
||||
.macro icebp
|
||||
.byte 0xf1
|
||||
.endm
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,105 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Thu, 2 Nov 2017 00:59:07 -0700
|
||||
Subject: [PATCH] xen, x86/entry/64: Add xen NMI trap entry
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Instead of trying to execute any NMI via the bare metal's NMI trap
|
||||
handler use a Xen specific one for PV domains, like we do for e.g.
|
||||
debug traps. As in a PV domain the NMI is handled via the normal
|
||||
kernel stack this is the correct thing to do.
|
||||
|
||||
This will enable us to get rid of the very fragile and questionable
|
||||
dependencies between the bare metal NMI handler and Xen assumptions
|
||||
believed to be broken anyway.
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/5baf5c0528d58402441550c5770b98e7961e7680.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 43e4111086a70c78bedb6ad990bee97f17b27a6e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 20c970e03b42141abf6c45938ce6d4fdc3555921)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/traps.h | 2 +-
|
||||
arch/x86/xen/enlighten_pv.c | 2 +-
|
||||
arch/x86/entry/entry_64.S | 2 +-
|
||||
arch/x86/xen/xen-asm_64.S | 2 +-
|
||||
4 files changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index 8e5bf86f87e5..b052a7621ca1 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -55,9 +55,9 @@ asmlinkage void simd_coprocessor_error(void);
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
asmlinkage void xen_divide_error(void);
|
||||
+asmlinkage void xen_xennmi(void);
|
||||
asmlinkage void xen_xendebug(void);
|
||||
asmlinkage void xen_xenint3(void);
|
||||
-asmlinkage void xen_nmi(void);
|
||||
asmlinkage void xen_overflow(void);
|
||||
asmlinkage void xen_bounds(void);
|
||||
asmlinkage void xen_invalid_op(void);
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 69b9deff7e5c..8da4eff19c2a 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -600,7 +600,7 @@ static struct trap_array_entry trap_array[] = {
|
||||
#ifdef CONFIG_X86_MCE
|
||||
{ machine_check, xen_machine_check, true },
|
||||
#endif
|
||||
- { nmi, xen_nmi, true },
|
||||
+ { nmi, xen_xennmi, true },
|
||||
{ overflow, xen_overflow, false },
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
{ entry_INT80_compat, xen_entry_INT80_compat, false },
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 4eff3aca54ed..5a6aba7cf3bd 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1091,6 +1091,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
||||
idtentry stack_segment do_stack_segment has_error_code=1
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
+idtentry xennmi do_nmi has_error_code=0
|
||||
idtentry xendebug do_debug has_error_code=0
|
||||
idtentry xenint3 do_int3 has_error_code=0
|
||||
#endif
|
||||
@@ -1253,7 +1254,6 @@ ENTRY(error_exit)
|
||||
END(error_exit)
|
||||
|
||||
/* Runs on exception stack */
|
||||
-/* XXX: broken on Xen PV */
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
/*
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index dae2cc33afb5..286ecc198562 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -29,7 +29,7 @@ xen_pv_trap debug
|
||||
xen_pv_trap xendebug
|
||||
xen_pv_trap int3
|
||||
xen_pv_trap xenint3
|
||||
-xen_pv_trap nmi
|
||||
+xen_pv_trap xennmi
|
||||
xen_pv_trap overflow
|
||||
xen_pv_trap bounds
|
||||
xen_pv_trap invalid_op
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,117 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:08 -0700
|
||||
Subject: [PATCH] x86/entry/64: De-Xen-ify our NMI code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen PV is fundamentally incompatible with our fancy NMI code: it
|
||||
doesn't use IST at all, and Xen entries clobber two stack slots
|
||||
below the hardware frame.
|
||||
|
||||
Drop Xen PV support from our NMI code entirely.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Acked-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/bfbe711b5ae03f672f8848999a8eb2711efc7f98.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 929bacec21478a72c78e4f29f98fb799bd00105a)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit ffc372909c1701c4fdd2bde7861692573ef381a7)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 30 ++++++++++++++++++------------
|
||||
1 file changed, 18 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 5a6aba7cf3bd..05501c781c20 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1253,9 +1253,13 @@ ENTRY(error_exit)
|
||||
jmp retint_user
|
||||
END(error_exit)
|
||||
|
||||
-/* Runs on exception stack */
|
||||
+/*
|
||||
+ * Runs on exception stack. Xen PV does not go through this path at all,
|
||||
+ * so we can use real assembly here.
|
||||
+ */
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
+
|
||||
/*
|
||||
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
||||
* the iretq it performs will take us out of NMI context.
|
||||
@@ -1313,7 +1317,7 @@ ENTRY(nmi)
|
||||
* stacks lest we corrupt the "NMI executing" variable.
|
||||
*/
|
||||
|
||||
- SWAPGS_UNSAFE_STACK
|
||||
+ swapgs
|
||||
cld
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
@@ -1478,7 +1482,7 @@ nested_nmi_out:
|
||||
popq %rdx
|
||||
|
||||
/* We are returning to kernel mode, so this cannot result in a fault. */
|
||||
- INTERRUPT_RETURN
|
||||
+ iretq
|
||||
|
||||
first_nmi:
|
||||
/* Restore rdx. */
|
||||
@@ -1509,7 +1513,7 @@ first_nmi:
|
||||
pushfq /* RFLAGS */
|
||||
pushq $__KERNEL_CS /* CS */
|
||||
pushq $1f /* RIP */
|
||||
- INTERRUPT_RETURN /* continues at repeat_nmi below */
|
||||
+ iretq /* continues at repeat_nmi below */
|
||||
UNWIND_HINT_IRET_REGS
|
||||
1:
|
||||
#endif
|
||||
@@ -1584,20 +1588,22 @@ nmi_restore:
|
||||
/*
|
||||
* Clear "NMI executing". Set DF first so that we can easily
|
||||
* distinguish the remaining code between here and IRET from
|
||||
- * the SYSCALL entry and exit paths. On a native kernel, we
|
||||
- * could just inspect RIP, but, on paravirt kernels,
|
||||
- * INTERRUPT_RETURN can translate into a jump into a
|
||||
- * hypercall page.
|
||||
+ * the SYSCALL entry and exit paths.
|
||||
+ *
|
||||
+ * We arguably should just inspect RIP instead, but I (Andy) wrote
|
||||
+ * this code when I had the misapprehension that Xen PV supported
|
||||
+ * NMIs, and Xen PV would break that approach.
|
||||
*/
|
||||
std
|
||||
movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
||||
|
||||
/*
|
||||
- * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
|
||||
- * stack in a single instruction. We are returning to kernel
|
||||
- * mode, so this cannot result in a fault.
|
||||
+ * iretq reads the "iret" frame and exits the NMI stack in a
|
||||
+ * single instruction. We are returning to kernel mode, so this
|
||||
+ * cannot result in a fault. Similarly, we don't need to worry
|
||||
+ * about espfix64 on the way back to kernel mode.
|
||||
*/
|
||||
- INTERRUPT_RETURN
|
||||
+ iretq
|
||||
END(nmi)
|
||||
|
||||
ENTRY(ignore_sysret)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,145 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:09 -0700
|
||||
Subject: [PATCH] x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update code out
|
||||
of native_load_sp0()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This causes the MSR_IA32_SYSENTER_CS write to move out of the
|
||||
paravirt callback. This shouldn't affect Xen PV: Xen already ignores
|
||||
MSR_IA32_SYSENTER_ESP writes. In any event, Xen doesn't support
|
||||
vm86() in a useful way.
|
||||
|
||||
Note to any potential backporters: This patch won't break lguest, as
|
||||
lguest didn't have any SYSENTER support at all.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/75cf09fe03ae778532d0ca6c65aa58e66bc2f90c.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit bd7dc5a6afac719d8ce4092391eef2c7e83c2a75)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 779e32d0da9a547f3b11fbecac8287e458ba67f5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/processor.h | 7 -------
|
||||
arch/x86/include/asm/switch_to.h | 12 ++++++++++++
|
||||
arch/x86/kernel/process_32.c | 4 +++-
|
||||
arch/x86/kernel/process_64.c | 2 +-
|
||||
arch/x86/kernel/vm86_32.c | 6 +++++-
|
||||
5 files changed, 21 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index 028245e1c42b..ee37fb86900a 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -513,13 +513,6 @@ static inline void
|
||||
native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
|
||||
{
|
||||
tss->x86_tss.sp0 = thread->sp0;
|
||||
-#ifdef CONFIG_X86_32
|
||||
- /* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||
- if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
|
||||
- tss->x86_tss.ss1 = thread->sysenter_cs;
|
||||
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
- }
|
||||
-#endif
|
||||
}
|
||||
|
||||
static inline void native_swapgs(void)
|
||||
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
|
||||
index fcc5cd387fd1..7ae8caffbada 100644
|
||||
--- a/arch/x86/include/asm/switch_to.h
|
||||
+++ b/arch/x86/include/asm/switch_to.h
|
||||
@@ -72,4 +72,16 @@ do { \
|
||||
((last) = __switch_to_asm((prev), (next))); \
|
||||
} while (0)
|
||||
|
||||
+#ifdef CONFIG_X86_32
|
||||
+static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||
+{
|
||||
+ /* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||
+ if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
|
||||
+ return;
|
||||
+
|
||||
+ this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
|
||||
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
||||
index 22802162eeb9..2e42b66b8ca4 100644
|
||||
--- a/arch/x86/kernel/process_32.c
|
||||
+++ b/arch/x86/kernel/process_32.c
|
||||
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
|
||||
/*
|
||||
* Reload esp0 and cpu_current_top_of_stack. This changes
|
||||
- * current_thread_info().
|
||||
+ * current_thread_info(). Refresh the SYSENTER configuration in
|
||||
+ * case prev or next is vm86.
|
||||
*/
|
||||
load_sp0(tss, next);
|
||||
+ refresh_sysenter_cs(next);
|
||||
this_cpu_write(cpu_current_top_of_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE);
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index 1e7701c4cd80..565daaa6f18d 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
*/
|
||||
this_cpu_write(current_task, next_p);
|
||||
|
||||
- /* Reload esp0 and ss1. This changes current_thread_info(). */
|
||||
+ /* Reload sp0. */
|
||||
load_sp0(tss, next);
|
||||
|
||||
/*
|
||||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
|
||||
index 7924a5356c8a..5bc1c3ab6287 100644
|
||||
--- a/arch/x86/kernel/vm86_32.c
|
||||
+++ b/arch/x86/kernel/vm86_32.c
|
||||
@@ -54,6 +54,7 @@
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/vm86.h>
|
||||
+#include <asm/switch_to.h>
|
||||
|
||||
/*
|
||||
* Known problems:
|
||||
@@ -149,6 +150,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
+ refresh_sysenter_cs(&tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
put_cpu();
|
||||
|
||||
@@ -368,8 +370,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
/* make room for real-mode segments */
|
||||
tsk->thread.sp0 += 16;
|
||||
|
||||
- if (static_cpu_has(X86_FEATURE_SEP))
|
||||
+ if (static_cpu_has(X86_FEATURE_SEP)) {
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
+ refresh_sysenter_cs(&tsk->thread);
|
||||
+ }
|
||||
|
||||
load_sp0(tss, &tsk->thread);
|
||||
put_cpu();
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,238 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:10 -0700
|
||||
Subject: [PATCH] x86/entry/64: Pass SP0 directly to load_sp0()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
load_sp0() had an odd signature:
|
||||
|
||||
void load_sp0(struct tss_struct *tss, struct thread_struct *thread);
|
||||
|
||||
Simplify it to:
|
||||
|
||||
void load_sp0(unsigned long sp0);
|
||||
|
||||
Also simplify a few get_cpu()/put_cpu() sequences to
|
||||
preempt_disable()/preempt_enable().
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit da51da189a24bb9b7e2d5a123be096e51a4695a5)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 41f6a89b0be4d052a6af59df5e56102d4e4c79ef)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/paravirt.h | 5 ++---
|
||||
arch/x86/include/asm/paravirt_types.h | 2 +-
|
||||
arch/x86/include/asm/processor.h | 9 ++++-----
|
||||
arch/x86/kernel/cpu/common.c | 4 ++--
|
||||
arch/x86/kernel/process_32.c | 2 +-
|
||||
arch/x86/kernel/process_64.c | 2 +-
|
||||
arch/x86/kernel/vm86_32.c | 14 ++++++--------
|
||||
arch/x86/xen/enlighten_pv.c | 7 +++----
|
||||
8 files changed, 20 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
|
||||
index 12deec722cf0..43d4f90edebc 100644
|
||||
--- a/arch/x86/include/asm/paravirt.h
|
||||
+++ b/arch/x86/include/asm/paravirt.h
|
||||
@@ -15,10 +15,9 @@
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
-static inline void load_sp0(struct tss_struct *tss,
|
||||
- struct thread_struct *thread)
|
||||
+static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
- PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
|
||||
+ PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
|
||||
}
|
||||
|
||||
/* The paravirtualized CPUID instruction. */
|
||||
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
|
||||
index 42873edd9f9d..e3953a1e2b57 100644
|
||||
--- a/arch/x86/include/asm/paravirt_types.h
|
||||
+++ b/arch/x86/include/asm/paravirt_types.h
|
||||
@@ -133,7 +133,7 @@ struct pv_cpu_ops {
|
||||
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||
|
||||
- void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
|
||||
+ void (*load_sp0)(unsigned long sp0);
|
||||
|
||||
void (*set_iopl_mask)(unsigned mask);
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index ee37fb86900a..85ddfc1a9bb5 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -510,9 +510,9 @@ static inline void native_set_iopl_mask(unsigned mask)
|
||||
}
|
||||
|
||||
static inline void
|
||||
-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
|
||||
+native_load_sp0(unsigned long sp0)
|
||||
{
|
||||
- tss->x86_tss.sp0 = thread->sp0;
|
||||
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||
}
|
||||
|
||||
static inline void native_swapgs(void)
|
||||
@@ -537,10 +537,9 @@ static inline unsigned long current_top_of_stack(void)
|
||||
#else
|
||||
#define __cpuid native_cpuid
|
||||
|
||||
-static inline void load_sp0(struct tss_struct *tss,
|
||||
- struct thread_struct *thread)
|
||||
+static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
- native_load_sp0(tss, thread);
|
||||
+ native_load_sp0(sp0);
|
||||
}
|
||||
|
||||
#define set_iopl_mask native_set_iopl_mask
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index ef7b1ba56363..6562acbfc4e0 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -1570,7 +1570,7 @@ void cpu_init(void)
|
||||
BUG_ON(me->mm);
|
||||
enter_lazy_tlb(&init_mm, me);
|
||||
|
||||
- load_sp0(t, ¤t->thread);
|
||||
+ load_sp0(current->thread.sp0);
|
||||
set_tss_desc(cpu, t);
|
||||
load_TR_desc();
|
||||
load_mm_ldt(&init_mm);
|
||||
@@ -1624,7 +1624,7 @@ void cpu_init(void)
|
||||
BUG_ON(curr->mm);
|
||||
enter_lazy_tlb(&init_mm, curr);
|
||||
|
||||
- load_sp0(t, thread);
|
||||
+ load_sp0(thread->sp0);
|
||||
set_tss_desc(cpu, t);
|
||||
load_TR_desc();
|
||||
load_mm_ldt(&init_mm);
|
||||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
||||
index 2e42b66b8ca4..48a3f240f565 100644
|
||||
--- a/arch/x86/kernel/process_32.c
|
||||
+++ b/arch/x86/kernel/process_32.c
|
||||
@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
* current_thread_info(). Refresh the SYSENTER configuration in
|
||||
* case prev or next is vm86.
|
||||
*/
|
||||
- load_sp0(tss, next);
|
||||
+ load_sp0(next->sp0);
|
||||
refresh_sysenter_cs(next);
|
||||
this_cpu_write(cpu_current_top_of_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index 565daaa6f18d..37b933628a8b 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -466,7 +466,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
this_cpu_write(current_task, next_p);
|
||||
|
||||
/* Reload sp0. */
|
||||
- load_sp0(tss, next);
|
||||
+ load_sp0(next->sp0);
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
|
||||
index 5bc1c3ab6287..0f1d92cd20ad 100644
|
||||
--- a/arch/x86/kernel/vm86_32.c
|
||||
+++ b/arch/x86/kernel/vm86_32.c
|
||||
@@ -94,7 +94,6 @@
|
||||
|
||||
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
{
|
||||
- struct tss_struct *tss;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86plus_struct __user *user;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
@@ -146,13 +145,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
- tss = &per_cpu(cpu_tss, get_cpu());
|
||||
+ preempt_disable();
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
- load_sp0(tss, &tsk->thread);
|
||||
+ load_sp0(tsk->thread.sp0);
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
- put_cpu();
|
||||
+ preempt_enable();
|
||||
|
||||
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
||||
|
||||
@@ -238,7 +237,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
{
|
||||
- struct tss_struct *tss;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86 *vm86 = tsk->thread.vm86;
|
||||
struct kernel_vm86_regs vm86regs;
|
||||
@@ -366,8 +364,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
vm86->saved_sp0 = tsk->thread.sp0;
|
||||
lazy_save_gs(vm86->regs32.gs);
|
||||
|
||||
- tss = &per_cpu(cpu_tss, get_cpu());
|
||||
/* make room for real-mode segments */
|
||||
+ preempt_disable();
|
||||
tsk->thread.sp0 += 16;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_SEP)) {
|
||||
@@ -375,8 +373,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
}
|
||||
|
||||
- load_sp0(tss, &tsk->thread);
|
||||
- put_cpu();
|
||||
+ load_sp0(tsk->thread.sp0);
|
||||
+ preempt_enable();
|
||||
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 8da4eff19c2a..e7b213047724 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -810,15 +810,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
|
||||
}
|
||||
}
|
||||
|
||||
-static void xen_load_sp0(struct tss_struct *tss,
|
||||
- struct thread_struct *thread)
|
||||
+static void xen_load_sp0(unsigned long sp0)
|
||||
{
|
||||
struct multicall_space mcs;
|
||||
|
||||
mcs = xen_mc_entry(0);
|
||||
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
|
||||
+ MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
- tss->x86_tss.sp0 = thread->sp0;
|
||||
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||
}
|
||||
|
||||
void xen_set_iopl_mask(unsigned mask)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,48 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:11 -0700
|
||||
Subject: [PATCH] x86/entry: Add task_top_of_stack() to find the top of a
|
||||
task's stack
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This will let us get rid of a few places that hardcode accesses to
|
||||
thread.sp0.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/b49b3f95a8ff858c40c9b0f5b32be0355324327d.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 3500130b84a3cdc5b6796eba1daf178944935efe)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit f1078e10e361afaeb22ee72c54d5ad397e19728d)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/processor.h | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index 85ddfc1a9bb5..f83fbf1b6dd9 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -788,6 +788,8 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
|
||||
TOP_OF_KERNEL_STACK_PADDING)
|
||||
|
||||
+#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
|
||||
+
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* User space process size: 3GB (default).
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,99 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:12 -0700
|
||||
Subject: [PATCH] x86/xen/64, x86/entry/64: Clean up SP code in
|
||||
cpu_initialize_context()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
I'm removing thread_struct::sp0, and Xen's usage of it is slightly
|
||||
dubious and unnecessary. Use appropriate helpers instead.
|
||||
|
||||
While we're at at, reorder the code slightly to make it more obvious
|
||||
what's going on.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/d5b9a3da2b47c68325bd2bbe8f82d9554dee0d0f.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit f16b3da1dc936c0f8121741d0a1731bf242f2f56)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27c60a1f6c49062151f67042458a523386cc3dc5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/smp_pv.c | 17 ++++++++++++++---
|
||||
1 file changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
|
||||
index 51471408fdd1..8c0e047d0b80 100644
|
||||
--- a/arch/x86/xen/smp_pv.c
|
||||
+++ b/arch/x86/xen/smp_pv.c
|
||||
@@ -13,6 +13,7 @@
|
||||
* single-threaded.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
+#include <linux/sched/task_stack.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/smp.h>
|
||||
@@ -293,12 +294,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
#endif
|
||||
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
|
||||
|
||||
+ /*
|
||||
+ * Bring up the CPU in cpu_bringup_and_idle() with the stack
|
||||
+ * pointing just below where pt_regs would be if it were a normal
|
||||
+ * kernel entry.
|
||||
+ */
|
||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||
ctxt->flags = VGCF_IN_KERNEL;
|
||||
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
||||
ctxt->user_regs.ds = __USER_DS;
|
||||
ctxt->user_regs.es = __USER_DS;
|
||||
ctxt->user_regs.ss = __KERNEL_DS;
|
||||
+ ctxt->user_regs.cs = __KERNEL_CS;
|
||||
+ ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
|
||||
|
||||
xen_copy_trap_info(ctxt->trap_ctxt);
|
||||
|
||||
@@ -313,8 +321,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
ctxt->gdt_frames[0] = gdt_mfn;
|
||||
ctxt->gdt_ents = GDT_ENTRIES;
|
||||
|
||||
+ /*
|
||||
+ * Set SS:SP that Xen will use when entering guest kernel mode
|
||||
+ * from guest user mode. Subsequent calls to load_sp0() can
|
||||
+ * change this value.
|
||||
+ */
|
||||
ctxt->kernel_ss = __KERNEL_DS;
|
||||
- ctxt->kernel_sp = idle->thread.sp0;
|
||||
+ ctxt->kernel_sp = task_top_of_stack(idle);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->event_callback_cs = __KERNEL_CS;
|
||||
@@ -326,10 +339,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
(unsigned long)xen_hypervisor_callback;
|
||||
ctxt->failsafe_callback_eip =
|
||||
(unsigned long)xen_failsafe_callback;
|
||||
- ctxt->user_regs.cs = __KERNEL_CS;
|
||||
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
|
||||
|
||||
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
|
||||
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
|
||||
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
|
||||
BUG();
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,102 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:13 -0700
|
||||
Subject: [PATCH] x86/entry/64: Stop initializing TSS.sp0 at boot
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
In my quest to get rid of thread_struct::sp0, I want to clean up or
|
||||
remove all of its readers. Two of them are in cpu_init() (32-bit and
|
||||
64-bit), and they aren't needed. This is because we never enter
|
||||
userspace at all on the threads that CPUs are initialized in.
|
||||
|
||||
Poison the initial TSS.sp0 and stop initializing it on CPU init.
|
||||
|
||||
The comment text mostly comes from Dave Hansen. Thanks!
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/ee4a00540ad28c6cff475fbcc7769a4460acc861.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8c6b12e88bd87433087ea1f1cd5a9a4975e4623c)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 13 ++++++++++---
|
||||
arch/x86/kernel/process.c | 8 +++++++-
|
||||
2 files changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index 6562acbfc4e0..121fe3570d6f 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -1570,9 +1570,13 @@ void cpu_init(void)
|
||||
BUG_ON(me->mm);
|
||||
enter_lazy_tlb(&init_mm, me);
|
||||
|
||||
- load_sp0(current->thread.sp0);
|
||||
+ /*
|
||||
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
|
||||
+ * task never enters user mode.
|
||||
+ */
|
||||
set_tss_desc(cpu, t);
|
||||
load_TR_desc();
|
||||
+
|
||||
load_mm_ldt(&init_mm);
|
||||
|
||||
clear_all_debug_regs();
|
||||
@@ -1594,7 +1598,6 @@ void cpu_init(void)
|
||||
int cpu = smp_processor_id();
|
||||
struct task_struct *curr = current;
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
- struct thread_struct *thread = &curr->thread;
|
||||
|
||||
wait_for_master_cpu(cpu);
|
||||
|
||||
@@ -1624,9 +1627,13 @@ void cpu_init(void)
|
||||
BUG_ON(curr->mm);
|
||||
enter_lazy_tlb(&init_mm, curr);
|
||||
|
||||
- load_sp0(thread->sp0);
|
||||
+ /*
|
||||
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
|
||||
+ * task never enters user mode.
|
||||
+ */
|
||||
set_tss_desc(cpu, t);
|
||||
load_TR_desc();
|
||||
+
|
||||
load_mm_ldt(&init_mm);
|
||||
|
||||
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
||||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
|
||||
index 3ca198080ea9..ccf3a4f4ef68 100644
|
||||
--- a/arch/x86/kernel/process.c
|
||||
+++ b/arch/x86/kernel/process.c
|
||||
@@ -48,7 +48,13 @@
|
||||
*/
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||
.x86_tss = {
|
||||
- .sp0 = TOP_OF_INIT_STACK,
|
||||
+ /*
|
||||
+ * .sp0 is only used when entering ring 0 from a lower
|
||||
+ * privilege level. Since the init task never runs anything
|
||||
+ * but ring 0 code, there is no need for a valid value here.
|
||||
+ * Poison it.
|
||||
+ */
|
||||
+ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
|
||||
#ifdef CONFIG_X86_32
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ss1 = __KERNEL_CS,
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,103 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:14 -0700
|
||||
Subject: [PATCH] x86/entry/64: Remove all remaining direct thread_struct::sp0
|
||||
reads
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The only remaining readers in context switch code or vm86(), and
|
||||
they all just want to update TSS.sp0 to match the current task.
|
||||
Replace them all with a new helper update_sp0().
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/2d231687f4ff288c9d9e98d7861b7df374246ac3.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit cc87284caa7d31d9d5a55c418eb5278cab6e2db1)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/switch_to.h | 6 ++++++
|
||||
arch/x86/kernel/process_32.c | 2 +-
|
||||
arch/x86/kernel/process_64.c | 2 +-
|
||||
arch/x86/kernel/vm86_32.c | 4 ++--
|
||||
4 files changed, 10 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
|
||||
index 7ae8caffbada..54e64d909725 100644
|
||||
--- a/arch/x86/include/asm/switch_to.h
|
||||
+++ b/arch/x86/include/asm/switch_to.h
|
||||
@@ -84,4 +84,10 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||
}
|
||||
#endif
|
||||
|
||||
+/* This is used when switching tasks or entering/exiting vm86 mode. */
|
||||
+static inline void update_sp0(struct task_struct *task)
|
||||
+{
|
||||
+ load_sp0(task->thread.sp0);
|
||||
+}
|
||||
+
|
||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
||||
index 48a3f240f565..c0d60420466c 100644
|
||||
--- a/arch/x86/kernel/process_32.c
|
||||
+++ b/arch/x86/kernel/process_32.c
|
||||
@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
* current_thread_info(). Refresh the SYSENTER configuration in
|
||||
* case prev or next is vm86.
|
||||
*/
|
||||
- load_sp0(next->sp0);
|
||||
+ update_sp0(next_p);
|
||||
refresh_sysenter_cs(next);
|
||||
this_cpu_write(cpu_current_top_of_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index 37b933628a8b..8a748e17bf6e 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -466,7 +466,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
this_cpu_write(current_task, next_p);
|
||||
|
||||
/* Reload sp0. */
|
||||
- load_sp0(next->sp0);
|
||||
+ update_sp0(next_p);
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
|
||||
index 0f1d92cd20ad..a7b44c75c642 100644
|
||||
--- a/arch/x86/kernel/vm86_32.c
|
||||
+++ b/arch/x86/kernel/vm86_32.c
|
||||
@@ -148,7 +148,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
preempt_disable();
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
- load_sp0(tsk->thread.sp0);
|
||||
+ update_sp0(tsk);
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
preempt_enable();
|
||||
@@ -373,7 +373,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
}
|
||||
|
||||
- load_sp0(tsk->thread.sp0);
|
||||
+ update_sp0(tsk);
|
||||
preempt_enable();
|
||||
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,51 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:15 -0700
|
||||
Subject: [PATCH] x86/entry/32: Fix cpu_current_top_of_stack initialization at
|
||||
boot
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
cpu_current_top_of_stack's initialization forgot about
|
||||
TOP_OF_KERNEL_STACK_PADDING. This bug didn't matter because the
|
||||
idle threads never enter user mode.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/e5e370a7e6e4fddd1c4e4cf619765d96bb874b21.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit cd493a6deb8b78eca280d05f7fa73fd69403ae29)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 258c98e7d4b8f1459772e656cd736c028a13add9)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/smpboot.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
|
||||
index d05006f6c31c..8ea3b18cbdc1 100644
|
||||
--- a/arch/x86/kernel/smpboot.c
|
||||
+++ b/arch/x86/kernel/smpboot.c
|
||||
@@ -961,8 +961,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Stack for startup_32 can be just as for start_secondary onwards */
|
||||
irq_ctx_init(cpu);
|
||||
- per_cpu(cpu_current_top_of_stack, cpu) =
|
||||
- (unsigned long)task_stack_page(idle) + THREAD_SIZE;
|
||||
+ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
|
||||
#else
|
||||
initial_gs = per_cpu_offset(cpu);
|
||||
#endif
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,154 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:16 -0700
|
||||
Subject: [PATCH] x86/entry/64: Remove thread_struct::sp0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
On x86_64, we can easily calculate sp0 when needed instead of
|
||||
storing it in thread_struct.
|
||||
|
||||
On x86_32, a similar cleanup would be possible, but it would require
|
||||
cleaning up the vm86 code first, and that can wait for a later
|
||||
cleanup series.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/719cd9c66c548c4350d98a90f050aee8b17f8919.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit d375cf1530595e33961a8844192cddab913650e3)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 4910af19c69a87e9432467f4d7cb78da5fbcc30a)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/compat.h | 1 +
|
||||
arch/x86/include/asm/processor.h | 28 +++++++++-------------------
|
||||
arch/x86/include/asm/switch_to.h | 6 ++++++
|
||||
arch/x86/kernel/process_64.c | 1 -
|
||||
4 files changed, 16 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
|
||||
index 5343c19814b3..948b6d8ec46f 100644
|
||||
--- a/arch/x86/include/asm/compat.h
|
||||
+++ b/arch/x86/include/asm/compat.h
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
+#include <linux/sched/task_stack.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/user32.h>
|
||||
#include <asm/unistd.h>
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index f83fbf1b6dd9..cec9a329c0f1 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -423,7 +423,9 @@ typedef struct {
|
||||
struct thread_struct {
|
||||
/* Cached TLS descriptors: */
|
||||
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
|
||||
+#ifdef CONFIG_X86_32
|
||||
unsigned long sp0;
|
||||
+#endif
|
||||
unsigned long sp;
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long sysenter_cs;
|
||||
@@ -790,6 +792,13 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
|
||||
#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
|
||||
|
||||
+#define task_pt_regs(task) \
|
||||
+({ \
|
||||
+ unsigned long __ptr = (unsigned long)task_stack_page(task); \
|
||||
+ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
|
||||
+ ((struct pt_regs *)__ptr) - 1; \
|
||||
+})
|
||||
+
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* User space process size: 3GB (default).
|
||||
@@ -807,23 +816,6 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
.addr_limit = KERNEL_DS, \
|
||||
}
|
||||
|
||||
-/*
|
||||
- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
|
||||
- * This is necessary to guarantee that the entire "struct pt_regs"
|
||||
- * is accessible even if the CPU haven't stored the SS/ESP registers
|
||||
- * on the stack (interrupt gate does not save these registers
|
||||
- * when switching to the same priv ring).
|
||||
- * Therefore beware: accessing the ss/esp fields of the
|
||||
- * "struct pt_regs" is possible, but they may contain the
|
||||
- * completely wrong values.
|
||||
- */
|
||||
-#define task_pt_regs(task) \
|
||||
-({ \
|
||||
- unsigned long __ptr = (unsigned long)task_stack_page(task); \
|
||||
- __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
|
||||
- ((struct pt_regs *)__ptr) - 1; \
|
||||
-})
|
||||
-
|
||||
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
|
||||
|
||||
#else
|
||||
@@ -853,11 +845,9 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
#define STACK_TOP_MAX TASK_SIZE_MAX
|
||||
|
||||
#define INIT_THREAD { \
|
||||
- .sp0 = TOP_OF_INIT_STACK, \
|
||||
.addr_limit = KERNEL_DS, \
|
||||
}
|
||||
|
||||
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
|
||||
extern unsigned long KSTK_ESP(struct task_struct *task);
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
|
||||
index 54e64d909725..010cd6e4eafc 100644
|
||||
--- a/arch/x86/include/asm/switch_to.h
|
||||
+++ b/arch/x86/include/asm/switch_to.h
|
||||
@@ -1,6 +1,8 @@
|
||||
#ifndef _ASM_X86_SWITCH_TO_H
|
||||
#define _ASM_X86_SWITCH_TO_H
|
||||
|
||||
+#include <linux/sched/task_stack.h>
|
||||
+
|
||||
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
||||
|
||||
struct task_struct *__switch_to_asm(struct task_struct *prev,
|
||||
@@ -87,7 +89,11 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||
/* This is used when switching tasks or entering/exiting vm86 mode. */
|
||||
static inline void update_sp0(struct task_struct *task)
|
||||
{
|
||||
+#ifdef CONFIG_X86_32
|
||||
load_sp0(task->thread.sp0);
|
||||
+#else
|
||||
+ load_sp0(task_top_of_stack(task));
|
||||
+#endif
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index 8a748e17bf6e..b08b9b6c40eb 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -275,7 +275,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
struct inactive_task_frame *frame;
|
||||
struct task_struct *me = current;
|
||||
|
||||
- p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
|
||||
childregs = task_pt_regs(p);
|
||||
fork_frame = container_of(childregs, struct fork_frame, regs);
|
||||
frame = &fork_frame->frame;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -1,118 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:17 -0700
|
||||
Subject: [PATCH] x86/traps: Use a new on_thread_stack() helper to clean up an
|
||||
assertion
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Let's keep the stack-related logic together rather than open-coding
|
||||
a comparison in an assertion in the traps code.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/856b15bee1f55017b8f79d3758b0d51c48a08cf8.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit 3383642c2f9d4f5b4fa37436db4a109a1a10018c)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 243de7bd3434c50fb07dd0fc84c462236cfcba3e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/processor.h | 8 ++++++++
|
||||
arch/x86/include/asm/thread_info.h | 22 +++++++++++-----------
|
||||
arch/x86/kernel/traps.c | 3 +--
|
||||
3 files changed, 20 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index cec9a329c0f1..79739e5f939a 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -159,6 +159,8 @@ enum cpuid_regs_idx {
|
||||
extern struct cpuinfo_x86 boot_cpu_data;
|
||||
extern struct cpuinfo_x86 new_cpu_data;
|
||||
|
||||
+#include <linux/thread_info.h>
|
||||
+
|
||||
extern struct tss_struct doublefault_tss;
|
||||
extern __u32 cpu_caps_cleared[NCAPINTS];
|
||||
extern __u32 cpu_caps_set[NCAPINTS];
|
||||
@@ -534,6 +536,12 @@ static inline unsigned long current_top_of_stack(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
+static inline bool on_thread_stack(void)
|
||||
+{
|
||||
+ return (unsigned long)(current_top_of_stack() -
|
||||
+ current_stack_pointer()) < THREAD_SIZE;
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
|
||||
index e00e1bd6e7b3..ec8ef3bbb7dc 100644
|
||||
--- a/arch/x86/include/asm/thread_info.h
|
||||
+++ b/arch/x86/include/asm/thread_info.h
|
||||
@@ -48,6 +48,17 @@
|
||||
* - this struct shares the supervisor stack pages
|
||||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
+static inline unsigned long current_stack_pointer(void)
|
||||
+{
|
||||
+ unsigned long sp;
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ asm("mov %%rsp,%0" : "=g" (sp));
|
||||
+#else
|
||||
+ asm("mov %%esp,%0" : "=g" (sp));
|
||||
+#endif
|
||||
+ return sp;
|
||||
+}
|
||||
+
|
||||
struct task_struct;
|
||||
#include <asm/cpufeature.h>
|
||||
#include <linux/atomic.h>
|
||||
@@ -155,17 +166,6 @@ struct thread_info {
|
||||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
-static inline unsigned long current_stack_pointer(void)
|
||||
-{
|
||||
- unsigned long sp;
|
||||
-#ifdef CONFIG_X86_64
|
||||
- asm("mov %%rsp,%0" : "=g" (sp));
|
||||
-#else
|
||||
- asm("mov %%esp,%0" : "=g" (sp));
|
||||
-#endif
|
||||
- return sp;
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Walks up the stack frames to make sure that the specified object is
|
||||
* entirely contained by a single stack frame.
|
||||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
|
||||
index b2157d4a5338..3a46cab2696e 100644
|
||||
--- a/arch/x86/kernel/traps.c
|
||||
+++ b/arch/x86/kernel/traps.c
|
||||
@@ -153,8 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
* will catch asm bugs and any attempt to use ist_preempt_enable
|
||||
* from double_fault.
|
||||
*/
|
||||
- BUG_ON((unsigned long)(current_top_of_stack() -
|
||||
- current_stack_pointer()) >= THREAD_SIZE);
|
||||
+ BUG_ON(!on_thread_stack());
|
||||
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
--
|
||||
2.14.2
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user