add KPTI and related patches
picked from Ubuntu-4.13.0-23.26
This commit is contained in:
parent
19894df472
commit
321d628a98
@ -1,7 +1,7 @@
|
||||
From 8e8e48c6f1ec020ff47f50aa49acab6c850cc70e Mon Sep 17 00:00:00 2001
|
||||
From: Ben Hutchings <ben@decadent.org.uk>
|
||||
Date: Tue, 12 May 2015 19:29:22 +0100
|
||||
Subject: [PATCH 01/14] Make mkcompile_h accept an alternate timestamp string
|
||||
Subject: [PATCH 001/231] Make mkcompile_h accept an alternate timestamp string
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From d9166325bf8b4d5a4c7aeb6a15c30c90ffc28347 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Thu, 14 Sep 2017 11:02:18 +0200
|
||||
Subject: [PATCH 02/14] bridge: keep MAC of first assigned port
|
||||
Subject: [PATCH 002/231] bridge: keep MAC of first assigned port
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 469fd3d2d05583a20c8210354cf0ad6cbd2360f7 Mon Sep 17 00:00:00 2001
|
||||
From: Mark Weiman <mark.weiman@markzz.com>
|
||||
Date: Sat, 29 Jul 2017 09:15:32 -0400
|
||||
Subject: [PATCH 03/14] pci: Enable overrides for missing ACS capabilities
|
||||
Subject: [PATCH 003/231] pci: Enable overrides for missing ACS capabilities
|
||||
(4.12+)
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 6003e55f5d4762a819d6691de92d75d29b6c0d58 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Thu, 14 Sep 2017 11:09:58 +0200
|
||||
Subject: [PATCH 04/14] kvm: disable default dynamic halt polling growth
|
||||
Subject: [PATCH 004/231] kvm: disable default dynamic halt polling growth
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From f33ce8d7dcb3053b513003fb775d6457d30d4921 Mon Sep 17 00:00:00 2001
|
||||
From: Waiman Long <longman@redhat.com>
|
||||
Date: Thu, 17 Aug 2017 15:33:09 -0400
|
||||
Subject: [PATCH 05/14] cgroup: Add mount flag to enable cpuset to use v2
|
||||
Subject: [PATCH 005/231] cgroup: Add mount flag to enable cpuset to use v2
|
||||
behavior in v1 cgroup
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 98df2e6815f8bfb7fb07458a067ddc96e7fe917d Mon Sep 17 00:00:00 2001
|
||||
From: Waiman Long <longman@redhat.com>
|
||||
Date: Thu, 17 Aug 2017 15:33:10 -0400
|
||||
Subject: [PATCH 06/14] cpuset: Allow v2 behavior in v1 cgroup
|
||||
Subject: [PATCH 006/231] cpuset: Allow v2 behavior in v1 cgroup
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From b6f813dded8f92cf6df31e1bcad4600b11dd4ae3 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
|
||||
Date: Thu, 30 Nov 2017 19:05:45 +0100
|
||||
Subject: [PATCH 07/14] KVM: x86: fix APIC page invalidation
|
||||
Subject: [PATCH 007/231] KVM: x86: fix APIC page invalidation
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 8ddb7f99e8c2ad80dbe3f9de01e8af5c310ae52d Mon Sep 17 00:00:00 2001
|
||||
From: Wei Xu <wexu@redhat.com>
|
||||
Date: Fri, 1 Dec 2017 05:10:36 -0500
|
||||
Subject: [PATCH 08/14] vhost: fix skb leak in handle_rx()
|
||||
Subject: [PATCH 008/231] vhost: fix skb leak in handle_rx()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 442f5963a52060fcf86a73377c31a863738632dd Mon Sep 17 00:00:00 2001
|
||||
From: Wei Xu <wexu@redhat.com>
|
||||
Date: Fri, 1 Dec 2017 05:10:37 -0500
|
||||
Subject: [PATCH 09/14] tun: free skb in early errors
|
||||
Subject: [PATCH 009/231] tun: free skb in early errors
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 3fe5d7c8bcba7d240e74c119c2c4ad1c696f205c Mon Sep 17 00:00:00 2001
|
||||
From: Wei Xu <wexu@redhat.com>
|
||||
Date: Fri, 1 Dec 2017 05:10:38 -0500
|
||||
Subject: [PATCH 10/14] tap: free skb if flags error
|
||||
Subject: [PATCH 010/231] tap: free skb if flags error
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 406a5590ca8c58f0f92927230285a3388e4527e4 Mon Sep 17 00:00:00 2001
|
||||
From: Parav Pandit <parav@mellanox.com>
|
||||
Date: Fri, 5 Jan 2018 23:51:12 +0100
|
||||
Subject: [PATCH 11/14] IB/core: Avoid crash on pkey enforcement failed in
|
||||
Subject: [PATCH 011/231] IB/core: Avoid crash on pkey enforcement failed in
|
||||
received MADs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 72083c18eb8824dd1d0580c1382d23f4fbc4ed33 Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Jurgens <danielj@mellanox.com>
|
||||
Date: Mon, 20 Nov 2017 16:47:45 -0600
|
||||
Subject: [PATCH 12/14] IB/core: Don't enforce PKey security on SMI MADs
|
||||
Subject: [PATCH 012/231] IB/core: Don't enforce PKey security on SMI MADs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 0140f5df6cd9e326f3009a16c1b66139b9bb3b45 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 6 Nov 2017 13:31:12 +0100
|
||||
Subject: [PATCH 13/14] kvm: vmx: Reinstate support for CPUs without virtual
|
||||
Subject: [PATCH 013/231] kvm: vmx: Reinstate support for CPUs without virtual
|
||||
NMI
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
|
@ -1,7 +1,7 @@
|
||||
From a0212ec7cc4bc2f88c4435cca881d21f2b079a80 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu, 26 Oct 2017 09:13:27 +0200
|
||||
Subject: [PATCH 14/14] KVM: SVM: obey guest PAT
|
||||
Subject: [PATCH 014/231] KVM: SVM: obey guest PAT
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
@ -0,0 +1,83 @@
|
||||
From 95e4ae0f4ad738ff6ec8e44ab9fa5529d4369655 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:20 -0700
|
||||
Subject: [PATCH 015/231] x86/mm: Add the 'nopcid' boot option to turn off PCID
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The parameter is only present on x86_64 systems to save a few bytes,
|
||||
as PCID is always disabled on x86_32.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 0790c9aad84901ca1bdc14746175549c8b5da215)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 62d3a63645c17611fe8ccc0c5adc5e840d9cff7b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Documentation/admin-guide/kernel-parameters.txt | 2 ++
|
||||
arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++
|
||||
2 files changed, 20 insertions(+)
|
||||
|
||||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||
index 73fd6abac39b..3510e255ef4c 100644
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -2700,6 +2700,8 @@
|
||||
nopat [X86] Disable PAT (page attribute table extension of
|
||||
pagetables) support.
|
||||
|
||||
+ nopcid [X86-64] Disable the PCID cpu feature.
|
||||
+
|
||||
norandmaps Don't use address space randomization. Equivalent to
|
||||
echo 0 > /proc/sys/kernel/randomize_va_space
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index c8b39870f33e..904485e7b230 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -168,6 +168,24 @@ static int __init x86_mpx_setup(char *s)
|
||||
}
|
||||
__setup("nompx", x86_mpx_setup);
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
+static int __init x86_pcid_setup(char *s)
|
||||
+{
|
||||
+ /* require an exact match without trailing characters */
|
||||
+ if (strlen(s))
|
||||
+ return 0;
|
||||
+
|
||||
+ /* do not emit a message if the feature is not present */
|
||||
+ if (!boot_cpu_has(X86_FEATURE_PCID))
|
||||
+ return 1;
|
||||
+
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
+ pr_info("nopcid: PCID feature disabled\n");
|
||||
+ return 1;
|
||||
+}
|
||||
+__setup("nopcid", x86_pcid_setup);
|
||||
+#endif
|
||||
+
|
||||
static int __init x86_noinvpcid_setup(char *s)
|
||||
{
|
||||
/* noinvpcid doesn't accept parameters */
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,120 @@
|
||||
From bbdde34293757490c18c57d8bd9f92e567bbdbcd Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:21 -0700
|
||||
Subject: [PATCH 016/231] x86/mm: Enable CR4.PCIDE on supported systems
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
We can use PCID if the CPU has PCID and PGE and we're not on Xen.
|
||||
|
||||
By itself, this has no effect. A followup patch will start using PCID.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 7d6bbe5528395f18de50bd2532843546c849883d)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/tlbflush.h | 8 ++++++++
|
||||
arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++
|
||||
arch/x86/xen/enlighten_pv.c | 6 ++++++
|
||||
3 files changed, 36 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 50ea3482e1d1..2b3d68093235 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -207,6 +207,14 @@ static inline void __flush_tlb_all(void)
|
||||
__flush_tlb_global();
|
||||
else
|
||||
__flush_tlb();
|
||||
+
|
||||
+ /*
|
||||
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
|
||||
+ * we'd end up flushing kernel translations for the current ASID but
|
||||
+ * we might fail to flush kernel translations for other cached ASIDs.
|
||||
+ *
|
||||
+ * To avoid this issue, we force PCID off if PGE is off.
|
||||
+ */
|
||||
}
|
||||
|
||||
static inline void __flush_tlb_one(unsigned long addr)
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index 904485e7b230..b95cd94ca97b 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -329,6 +329,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
+static void setup_pcid(struct cpuinfo_x86 *c)
|
||||
+{
|
||||
+ if (cpu_has(c, X86_FEATURE_PCID)) {
|
||||
+ if (cpu_has(c, X86_FEATURE_PGE)) {
|
||||
+ cr4_set_bits(X86_CR4_PCIDE);
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * flush_tlb_all(), as currently implemented, won't
|
||||
+ * work if PCID is on but PGE is not. Since that
|
||||
+ * combination doesn't exist on real hardware, there's
|
||||
+ * no reason to try to fully support it, but it's
|
||||
+ * polite to avoid corrupting data if we're on
|
||||
+ * an improperly configured VM.
|
||||
+ */
|
||||
+ clear_cpu_cap(c, X86_FEATURE_PCID);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Protection Keys are not available in 32-bit mode.
|
||||
*/
|
||||
@@ -1143,6 +1162,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
setup_smep(c);
|
||||
setup_smap(c);
|
||||
|
||||
+ /* Set up PCID */
|
||||
+ setup_pcid(c);
|
||||
+
|
||||
/*
|
||||
* The vendor-specific functions might have changed features.
|
||||
* Now we do "generic changes."
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 811e4ddb3f37..290bc5ac9852 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -264,6 +264,12 @@ static void __init xen_init_capabilities(void)
|
||||
setup_clear_cpu_cap(X86_FEATURE_ACC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
|
||||
|
||||
+ /*
|
||||
+ * Xen PV would need some work to support PCID: CR3 handling as well
|
||||
+ * as xen_flush_tlb_others() would need updating.
|
||||
+ */
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
+
|
||||
if (!xen_initial_domain())
|
||||
setup_clear_cpu_cap(X86_FEATURE_ACPI);
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,54 @@
|
||||
From 20e07f035810f1b2bb3d816e49f48f6b6a37bf64 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Wed, 6 Sep 2017 19:54:54 -0700
|
||||
Subject: [PATCH 017/231] x86/mm: Document how CR4.PCIDE restore works
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
While debugging a problem, I thought that using
|
||||
cr4_set_bits_and_update_boot() to restore CR4.PCIDE would be
|
||||
helpful. It turns out to be counterproductive.
|
||||
|
||||
Add a comment documenting how this works.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
(cherry picked from commit 1c9fe4409ce3e9c78b1ed96ee8ed699d4f03bf33)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0d69e4c4a2db42a9bac6609a3df15bd91163f8b9)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 13 +++++++++++++
|
||||
1 file changed, 13 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index b95cd94ca97b..0b80ed14ff52 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -333,6 +333,19 @@ static void setup_pcid(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (cpu_has(c, X86_FEATURE_PCID)) {
|
||||
if (cpu_has(c, X86_FEATURE_PGE)) {
|
||||
+ /*
|
||||
+ * We'd like to use cr4_set_bits_and_update_boot(),
|
||||
+ * but we can't. CR4.PCIDE is special and can only
|
||||
+ * be set in long mode, and the early CPU init code
|
||||
+ * doesn't know this and would try to restore CR4.PCIDE
|
||||
+ * prior to entering long mode.
|
||||
+ *
|
||||
+ * Instead, we rely on the fact that hotplug, resume,
|
||||
+ * etc all fully restore CR4 before they write anything
|
||||
+ * that could have nonzero PCID bits to CR3. CR4.PCIDE
|
||||
+ * has no effect on the page tables themselves, so we
|
||||
+ * don't need it to be restored early.
|
||||
+ */
|
||||
cr4_set_bits(X86_CR4_PCIDE);
|
||||
} else {
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,202 @@
|
||||
From 2a767692d6140051e569ab59a1440b3760839e03 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Tue, 11 Jul 2017 10:33:38 -0500
|
||||
Subject: [PATCH 018/231] x86/entry/64: Refactor IRQ stacks and make them
|
||||
NMI-safe
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This will allow IRQ stacks to nest inside NMIs or similar entries
|
||||
that can happen during IRQ stack setup or teardown.
|
||||
|
||||
The new macros won't work correctly if they're invoked with IRQs on.
|
||||
Add a check under CONFIG_DEBUG_ENTRY to detect that.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
[ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool
|
||||
and ORC unwinder's lives a little easier. ]
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 1d3e53e8624a3ec85f4041ca6d973da7c1575938)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit be58b042e135d0ee777a54798f33015857d7e2e0)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/process_64.c | 3 ++
|
||||
arch/x86/Kconfig.debug | 2 --
|
||||
arch/x86/entry/entry_64.S | 85 +++++++++++++++++++++++++++++++-------------
|
||||
3 files changed, 64 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index fe56e6f93cbb..1e7701c4cd80 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -404,6 +404,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
|
||||
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||||
+ this_cpu_read(irq_count) != -1);
|
||||
+
|
||||
switch_fpu_prepare(prev_fpu, cpu);
|
||||
|
||||
/* We must save %fs and %gs before load_TLS() because
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index cd20ca0b4043..1fc519f3c49e 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
|
||||
Some of these sanity checks may slow down kernel entries and
|
||||
exits or otherwise impact performance.
|
||||
|
||||
- This is currently used to help test NMI code.
|
||||
-
|
||||
If unsure, say N.
|
||||
|
||||
config DEBUG_NMI_SELFTEST
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 6d078b89a5e8..07b4056af8a8 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
|
||||
.endr
|
||||
END(irq_entries_start)
|
||||
|
||||
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ pushfq
|
||||
+ testl $X86_EFLAGS_IF, (%rsp)
|
||||
+ jz .Lokay_\@
|
||||
+ ud2
|
||||
+.Lokay_\@:
|
||||
+ addq $8, %rsp
|
||||
+#endif
|
||||
+.endm
|
||||
+
|
||||
+/*
|
||||
+ * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
|
||||
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
|
||||
+ * Requires kernel GSBASE.
|
||||
+ *
|
||||
+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
|
||||
+ */
|
||||
+.macro ENTER_IRQ_STACK old_rsp
|
||||
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
+ movq %rsp, \old_rsp
|
||||
+ incl PER_CPU_VAR(irq_count)
|
||||
+
|
||||
+ /*
|
||||
+ * Right now, if we just incremented irq_count to zero, we've
|
||||
+ * claimed the IRQ stack but we haven't switched to it yet.
|
||||
+ *
|
||||
+ * If anything is added that can interrupt us here without using IST,
|
||||
+ * it must be *extremely* careful to limit its stack usage. This
|
||||
+ * could include kprobes and a hypothetical future IST-less #DB
|
||||
+ * handler.
|
||||
+ */
|
||||
+
|
||||
+ cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
+ pushq \old_rsp
|
||||
+.endm
|
||||
+
|
||||
+/*
|
||||
+ * Undoes ENTER_IRQ_STACK.
|
||||
+ */
|
||||
+.macro LEAVE_IRQ_STACK
|
||||
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
+ /* We need to be off the IRQ stack before decrementing irq_count. */
|
||||
+ popq %rsp
|
||||
+
|
||||
+ /*
|
||||
+ * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
|
||||
+ * the irq stack but we're not on it.
|
||||
+ */
|
||||
+
|
||||
+ decl PER_CPU_VAR(irq_count)
|
||||
+.endm
|
||||
+
|
||||
/*
|
||||
* Interrupt entry/exit.
|
||||
*
|
||||
@@ -485,17 +538,7 @@ END(irq_entries_start)
|
||||
CALL_enter_from_user_mode
|
||||
|
||||
1:
|
||||
- /*
|
||||
- * Save previous stack pointer, optionally switch to interrupt stack.
|
||||
- * irq_count is used to check if a CPU is already on an interrupt stack
|
||||
- * or not. While this is essentially redundant with preempt_count it is
|
||||
- * a little cheaper to use a separate counter in the PDA (short of
|
||||
- * moving irq_enter into assembly, which would be too much work)
|
||||
- */
|
||||
- movq %rsp, %rdi
|
||||
- incl PER_CPU_VAR(irq_count)
|
||||
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
- pushq %rdi
|
||||
+ ENTER_IRQ_STACK old_rsp=%rdi
|
||||
/* We entered an interrupt context - irqs are off: */
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
@@ -515,10 +558,8 @@ common_interrupt:
|
||||
ret_from_intr:
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
- decl PER_CPU_VAR(irq_count)
|
||||
|
||||
- /* Restore saved previous stack */
|
||||
- popq %rsp
|
||||
+ LEAVE_IRQ_STACK
|
||||
|
||||
testb $3, CS(%rsp)
|
||||
jz retint_kernel
|
||||
@@ -892,12 +933,10 @@ bad_gs:
|
||||
ENTRY(do_softirq_own_stack)
|
||||
pushq %rbp
|
||||
mov %rsp, %rbp
|
||||
- incl PER_CPU_VAR(irq_count)
|
||||
- cmove PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
- push %rbp /* frame pointer backlink */
|
||||
+ ENTER_IRQ_STACK old_rsp=%r11
|
||||
call __do_softirq
|
||||
+ LEAVE_IRQ_STACK
|
||||
leaveq
|
||||
- decl PER_CPU_VAR(irq_count)
|
||||
ret
|
||||
END(do_softirq_own_stack)
|
||||
|
||||
@@ -924,13 +963,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
|
||||
* see the correct pointer to the pt_regs
|
||||
*/
|
||||
movq %rdi, %rsp /* we don't return, adjust the stack frame */
|
||||
-11: incl PER_CPU_VAR(irq_count)
|
||||
- movq %rsp, %rbp
|
||||
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
- pushq %rbp /* frame pointer backlink */
|
||||
+
|
||||
+ ENTER_IRQ_STACK old_rsp=%r10
|
||||
call xen_evtchn_do_upcall
|
||||
- popq %rsp
|
||||
- decl PER_CPU_VAR(irq_count)
|
||||
+ LEAVE_IRQ_STACK
|
||||
+
|
||||
#ifndef CONFIG_PREEMPT
|
||||
call xen_maybe_preempt_hcall
|
||||
#endif
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,94 @@
|
||||
From 63463bcffe420067411ad3d4d01b79c872fffc3a Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Tue, 11 Jul 2017 10:33:39 -0500
|
||||
Subject: [PATCH 019/231] x86/entry/64: Initialize the top of the IRQ stack
|
||||
before switching stacks
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The OOPS unwinder wants the word at the top of the IRQ stack to
|
||||
point back to the previous stack at all times when the IRQ stack
|
||||
is in use. There's currently a one-instruction window in ENTER_IRQ_STACK
|
||||
during which this isn't the case. Fix it by writing the old RSP to the
|
||||
top of the IRQ stack before jumping.
|
||||
|
||||
This currently writes the pointer to the stack twice, which is a bit
|
||||
ugly. We could get rid of this by replacing irq_stack_ptr with
|
||||
irq_stack_ptr_minus_eight (better name welcome). OTOH, there may be
|
||||
all kinds of odd microarchitectural considerations in play that
|
||||
affect performance by a few cycles here.
|
||||
|
||||
Reported-by: Mike Galbraith <efault@gmx.de>
|
||||
Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/aae7e79e49914808440ad5310ace138ced2179ca.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2995590964da93e1fd9a91550f9c9d9fab28f160)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a753ff654dfd07a7f8d6f39a27126589eac7e55f)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 07b4056af8a8..184b70712545 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -469,6 +469,7 @@ END(irq_entries_start)
|
||||
DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
movq %rsp, \old_rsp
|
||||
incl PER_CPU_VAR(irq_count)
|
||||
+ jnz .Lirq_stack_push_old_rsp_\@
|
||||
|
||||
/*
|
||||
* Right now, if we just incremented irq_count to zero, we've
|
||||
@@ -478,9 +479,30 @@ END(irq_entries_start)
|
||||
* it must be *extremely* careful to limit its stack usage. This
|
||||
* could include kprobes and a hypothetical future IST-less #DB
|
||||
* handler.
|
||||
+ *
|
||||
+ * The OOPS unwinder relies on the word at the top of the IRQ
|
||||
+ * stack linking back to the previous RSP for the entire time we're
|
||||
+ * on the IRQ stack. For this to work reliably, we need to write
|
||||
+ * it before we actually move ourselves to the IRQ stack.
|
||||
+ */
|
||||
+
|
||||
+ movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
|
||||
+ movq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ /*
|
||||
+ * If the first movq above becomes wrong due to IRQ stack layout
|
||||
+ * changes, the only way we'll notice is if we try to unwind right
|
||||
+ * here. Assert that we set up the stack right to catch this type
|
||||
+ * of bug quickly.
|
||||
*/
|
||||
+ cmpq -8(%rsp), \old_rsp
|
||||
+ je .Lirq_stack_okay\@
|
||||
+ ud2
|
||||
+ .Lirq_stack_okay\@:
|
||||
+#endif
|
||||
|
||||
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
+.Lirq_stack_push_old_rsp_\@:
|
||||
pushq \old_rsp
|
||||
.endm
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,463 @@
|
||||
From 884fcb9e8befe21a962d95664b1e60377284636a Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 11 Jul 2017 10:33:44 -0500
|
||||
Subject: [PATCH 020/231] x86/entry/64: Add unwind hint annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add unwind hint annotations to entry_64.S. This will enable the ORC
|
||||
unwinder to unwind through any location in the entry code including
|
||||
syscalls, interrupts, and exceptions.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/b9f6d478aadf68ba57c739dcfac34ec0dc021c4c.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 8c1f75587a18ca032da8f6376d1ed882d7095289)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a8448e6971c1e71b22c651131d14f8be76e6d399)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/Makefile | 1 -
|
||||
arch/x86/entry/calling.h | 5 ++++
|
||||
arch/x86/entry/entry_64.S | 71 ++++++++++++++++++++++++++++++++++++++++-------
|
||||
3 files changed, 66 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
|
||||
index 9976fcecd17e..af28a8a24366 100644
|
||||
--- a/arch/x86/entry/Makefile
|
||||
+++ b/arch/x86/entry/Makefile
|
||||
@@ -2,7 +2,6 @@
|
||||
# Makefile for the x86 low level entry code
|
||||
#
|
||||
|
||||
-OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
|
||||
|
||||
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
|
||||
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
||||
index 05ed3d393da7..640aafebdc00 100644
|
||||
--- a/arch/x86/entry/calling.h
|
||||
+++ b/arch/x86/entry/calling.h
|
||||
@@ -1,4 +1,5 @@
|
||||
#include <linux/jump_label.h>
|
||||
+#include <asm/unwind_hints.h>
|
||||
|
||||
/*
|
||||
|
||||
@@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
movq %rdx, 12*8+\offset(%rsp)
|
||||
movq %rsi, 13*8+\offset(%rsp)
|
||||
movq %rdi, 14*8+\offset(%rsp)
|
||||
+ UNWIND_HINT_REGS offset=\offset extra=0
|
||||
.endm
|
||||
.macro SAVE_C_REGS offset=0
|
||||
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
|
||||
@@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
movq %r12, 3*8+\offset(%rsp)
|
||||
movq %rbp, 4*8+\offset(%rsp)
|
||||
movq %rbx, 5*8+\offset(%rsp)
|
||||
+ UNWIND_HINT_REGS offset=\offset
|
||||
.endm
|
||||
|
||||
.macro RESTORE_EXTRA_REGS offset=0
|
||||
@@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
movq 3*8+\offset(%rsp), %r12
|
||||
movq 4*8+\offset(%rsp), %rbp
|
||||
movq 5*8+\offset(%rsp), %rbx
|
||||
+ UNWIND_HINT_REGS offset=\offset extra=0
|
||||
.endm
|
||||
|
||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
@@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
.endif
|
||||
movq 13*8(%rsp), %rsi
|
||||
movq 14*8(%rsp), %rdi
|
||||
+ UNWIND_HINT_IRET_REGS offset=16*8
|
||||
.endm
|
||||
.macro RESTORE_C_REGS
|
||||
RESTORE_C_REGS_HELPER 1,1,1,1,1
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 184b70712545..64b233ab7cad 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <asm/smap.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/export.h>
|
||||
+#include <asm/frame.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
.code64
|
||||
@@ -43,9 +44,10 @@
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
ENTRY(native_usergs_sysret64)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
swapgs
|
||||
sysretq
|
||||
-ENDPROC(native_usergs_sysret64)
|
||||
+END(native_usergs_sysret64)
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
|
||||
.macro TRACE_IRQS_IRETQ
|
||||
@@ -134,6 +136,7 @@ ENDPROC(native_usergs_sysret64)
|
||||
*/
|
||||
|
||||
ENTRY(entry_SYSCALL_64)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
@@ -169,6 +172,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
pushq %r10 /* pt_regs->r10 */
|
||||
pushq %r11 /* pt_regs->r11 */
|
||||
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
|
||||
+ UNWIND_HINT_REGS extra=0
|
||||
|
||||
/*
|
||||
* If we need to do entry work or if we guess we'll need to do
|
||||
@@ -223,6 +227,7 @@ entry_SYSCALL_64_fastpath:
|
||||
movq EFLAGS(%rsp), %r11
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
+ UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
|
||||
1:
|
||||
@@ -316,6 +321,7 @@ syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
+ UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
@@ -343,6 +349,7 @@ ENTRY(stub_ptregs_64)
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
popq %rax
|
||||
+ UNWIND_HINT_REGS extra=0
|
||||
jmp entry_SYSCALL64_slow_path
|
||||
|
||||
1:
|
||||
@@ -351,6 +358,7 @@ END(stub_ptregs_64)
|
||||
|
||||
.macro ptregs_stub func
|
||||
ENTRY(ptregs_\func)
|
||||
+ UNWIND_HINT_FUNC
|
||||
leaq \func(%rip), %rax
|
||||
jmp stub_ptregs_64
|
||||
END(ptregs_\func)
|
||||
@@ -367,6 +375,7 @@ END(ptregs_\func)
|
||||
* %rsi: next task
|
||||
*/
|
||||
ENTRY(__switch_to_asm)
|
||||
+ UNWIND_HINT_FUNC
|
||||
/*
|
||||
* Save callee-saved registers
|
||||
* This must match the order in inactive_task_frame
|
||||
@@ -406,6 +415,7 @@ END(__switch_to_asm)
|
||||
* r12: kernel thread arg
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
movq %rax, %rdi
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
@@ -413,6 +423,7 @@ ENTRY(ret_from_fork)
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
|
||||
2:
|
||||
+ UNWIND_HINT_REGS
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
@@ -440,10 +451,11 @@ END(ret_from_fork)
|
||||
ENTRY(irq_entries_start)
|
||||
vector=FIRST_EXTERNAL_VECTOR
|
||||
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
pushq $(~vector+0x80) /* Note: always in signed byte range */
|
||||
- vector=vector+1
|
||||
jmp common_interrupt
|
||||
.align 8
|
||||
+ vector=vector+1
|
||||
.endr
|
||||
END(irq_entries_start)
|
||||
|
||||
@@ -465,9 +477,14 @@ END(irq_entries_start)
|
||||
*
|
||||
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
|
||||
*/
|
||||
-.macro ENTER_IRQ_STACK old_rsp
|
||||
+.macro ENTER_IRQ_STACK regs=1 old_rsp
|
||||
DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
movq %rsp, \old_rsp
|
||||
+
|
||||
+ .if \regs
|
||||
+ UNWIND_HINT_REGS base=\old_rsp
|
||||
+ .endif
|
||||
+
|
||||
incl PER_CPU_VAR(irq_count)
|
||||
jnz .Lirq_stack_push_old_rsp_\@
|
||||
|
||||
@@ -504,16 +521,24 @@ END(irq_entries_start)
|
||||
|
||||
.Lirq_stack_push_old_rsp_\@:
|
||||
pushq \old_rsp
|
||||
+
|
||||
+ .if \regs
|
||||
+ UNWIND_HINT_REGS indirect=1
|
||||
+ .endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Undoes ENTER_IRQ_STACK.
|
||||
*/
|
||||
-.macro LEAVE_IRQ_STACK
|
||||
+.macro LEAVE_IRQ_STACK regs=1
|
||||
DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
/* We need to be off the IRQ stack before decrementing irq_count. */
|
||||
popq %rsp
|
||||
|
||||
+ .if \regs
|
||||
+ UNWIND_HINT_REGS
|
||||
+ .endif
|
||||
+
|
||||
/*
|
||||
* As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
|
||||
* the irq stack but we're not on it.
|
||||
@@ -624,6 +649,7 @@ restore_c_regs_and_iret:
|
||||
INTERRUPT_RETURN
|
||||
|
||||
ENTRY(native_iret)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
/*
|
||||
* Are we returning to a stack segment from the LDT? Note: in
|
||||
* 64-bit mode SS:RSP on the exception stack is always valid.
|
||||
@@ -696,6 +722,7 @@ native_irq_return_ldt:
|
||||
orq PER_CPU_VAR(espfix_stack), %rax
|
||||
SWAPGS
|
||||
movq %rax, %rsp
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
|
||||
/*
|
||||
* At this point, we cannot write to the stack any more, but we can
|
||||
@@ -717,6 +744,7 @@ END(common_interrupt)
|
||||
*/
|
||||
.macro apicinterrupt3 num sym do_sym
|
||||
ENTRY(\sym)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
ASM_CLAC
|
||||
pushq $~(\num)
|
||||
.Lcommon_\sym:
|
||||
@@ -803,6 +831,8 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
ENTRY(\sym)
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
+
|
||||
/* Sanity check */
|
||||
.if \shift_ist != -1 && \paranoid == 0
|
||||
.error "using shift_ist requires paranoid=1"
|
||||
@@ -826,6 +856,7 @@ ENTRY(\sym)
|
||||
.else
|
||||
call error_entry
|
||||
.endif
|
||||
+ UNWIND_HINT_REGS
|
||||
/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
|
||||
|
||||
.if \paranoid
|
||||
@@ -923,6 +954,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
|
||||
* edi: new selector
|
||||
*/
|
||||
ENTRY(native_load_gs_index)
|
||||
+ FRAME_BEGIN
|
||||
pushfq
|
||||
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
|
||||
SWAPGS
|
||||
@@ -931,8 +963,9 @@ ENTRY(native_load_gs_index)
|
||||
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
|
||||
SWAPGS
|
||||
popfq
|
||||
+ FRAME_END
|
||||
ret
|
||||
-END(native_load_gs_index)
|
||||
+ENDPROC(native_load_gs_index)
|
||||
EXPORT_SYMBOL(native_load_gs_index)
|
||||
|
||||
_ASM_EXTABLE(.Lgs_change, bad_gs)
|
||||
@@ -955,12 +988,12 @@ bad_gs:
|
||||
ENTRY(do_softirq_own_stack)
|
||||
pushq %rbp
|
||||
mov %rsp, %rbp
|
||||
- ENTER_IRQ_STACK old_rsp=%r11
|
||||
+ ENTER_IRQ_STACK regs=0 old_rsp=%r11
|
||||
call __do_softirq
|
||||
- LEAVE_IRQ_STACK
|
||||
+ LEAVE_IRQ_STACK regs=0
|
||||
leaveq
|
||||
ret
|
||||
-END(do_softirq_own_stack)
|
||||
+ENDPROC(do_softirq_own_stack)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
|
||||
@@ -984,7 +1017,9 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
|
||||
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
|
||||
* see the correct pointer to the pt_regs
|
||||
*/
|
||||
+ UNWIND_HINT_FUNC
|
||||
movq %rdi, %rsp /* we don't return, adjust the stack frame */
|
||||
+ UNWIND_HINT_REGS
|
||||
|
||||
ENTER_IRQ_STACK old_rsp=%r10
|
||||
call xen_evtchn_do_upcall
|
||||
@@ -1010,6 +1045,7 @@ END(xen_do_hypervisor_callback)
|
||||
* with its current contents: any discrepancy means we in category 1.
|
||||
*/
|
||||
ENTRY(xen_failsafe_callback)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
movl %ds, %ecx
|
||||
cmpw %cx, 0x10(%rsp)
|
||||
jne 1f
|
||||
@@ -1029,11 +1065,13 @@ ENTRY(xen_failsafe_callback)
|
||||
pushq $0 /* RIP */
|
||||
pushq %r11
|
||||
pushq %rcx
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
jmp general_protection
|
||||
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
|
||||
movq (%rsp), %rcx
|
||||
movq 8(%rsp), %r11
|
||||
addq $0x30, %rsp
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
pushq $-1 /* orig_ax = -1 => not a system call */
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
SAVE_C_REGS
|
||||
@@ -1079,6 +1117,7 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec
|
||||
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
|
||||
*/
|
||||
ENTRY(paranoid_entry)
|
||||
+ UNWIND_HINT_FUNC
|
||||
cld
|
||||
SAVE_C_REGS 8
|
||||
SAVE_EXTRA_REGS 8
|
||||
@@ -1106,6 +1145,7 @@ END(paranoid_entry)
|
||||
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
|
||||
*/
|
||||
ENTRY(paranoid_exit)
|
||||
+ UNWIND_HINT_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF_DEBUG
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
@@ -1127,6 +1167,7 @@ END(paranoid_exit)
|
||||
* Return: EBX=0: came from user mode; EBX=1: otherwise
|
||||
*/
|
||||
ENTRY(error_entry)
|
||||
+ UNWIND_HINT_FUNC
|
||||
cld
|
||||
SAVE_C_REGS 8
|
||||
SAVE_EXTRA_REGS 8
|
||||
@@ -1211,6 +1252,7 @@ END(error_entry)
|
||||
* 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
|
||||
*/
|
||||
ENTRY(error_exit)
|
||||
+ UNWIND_HINT_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
testl %ebx, %ebx
|
||||
@@ -1220,6 +1262,7 @@ END(error_exit)
|
||||
|
||||
/* Runs on exception stack */
|
||||
ENTRY(nmi)
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
/*
|
||||
* Fix up the exception frame if we're on Xen.
|
||||
* PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
|
||||
@@ -1293,11 +1336,13 @@ ENTRY(nmi)
|
||||
cld
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
+ UNWIND_HINT_IRET_REGS base=%rdx offset=8
|
||||
pushq 5*8(%rdx) /* pt_regs->ss */
|
||||
pushq 4*8(%rdx) /* pt_regs->rsp */
|
||||
pushq 3*8(%rdx) /* pt_regs->flags */
|
||||
pushq 2*8(%rdx) /* pt_regs->cs */
|
||||
pushq 1*8(%rdx) /* pt_regs->rip */
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
pushq $-1 /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
@@ -1314,6 +1359,7 @@ ENTRY(nmi)
|
||||
pushq %r13 /* pt_regs->r13 */
|
||||
pushq %r14 /* pt_regs->r14 */
|
||||
pushq %r15 /* pt_regs->r15 */
|
||||
+ UNWIND_HINT_REGS
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
/*
|
||||
@@ -1468,6 +1514,7 @@ first_nmi:
|
||||
.rept 5
|
||||
pushq 11*8(%rsp)
|
||||
.endr
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
|
||||
/* Everything up to here is safe from nested NMIs */
|
||||
|
||||
@@ -1483,6 +1530,7 @@ first_nmi:
|
||||
pushq $__KERNEL_CS /* CS */
|
||||
pushq $1f /* RIP */
|
||||
INTERRUPT_RETURN /* continues at repeat_nmi below */
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
1:
|
||||
#endif
|
||||
|
||||
@@ -1532,6 +1580,7 @@ end_repeat_nmi:
|
||||
* exceptions might do.
|
||||
*/
|
||||
call paranoid_entry
|
||||
+ UNWIND_HINT_REGS
|
||||
|
||||
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
||||
movq %rsp, %rdi
|
||||
@@ -1569,17 +1618,19 @@ nmi_restore:
|
||||
END(nmi)
|
||||
|
||||
ENTRY(ignore_sysret)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
mov $-ENOSYS, %eax
|
||||
sysret
|
||||
END(ignore_sysret)
|
||||
|
||||
ENTRY(rewind_stack_do_exit)
|
||||
+ UNWIND_HINT_FUNC
|
||||
/* Prevent any naive code from trying to unwind to our caller. */
|
||||
xorl %ebp, %ebp
|
||||
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
|
||||
- leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
|
||||
+ leaq -PTREGS_SIZE(%rax), %rsp
|
||||
+ UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
|
||||
|
||||
call do_exit
|
||||
-1: jmp 1b
|
||||
END(rewind_stack_do_exit)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,70 @@
|
||||
From aa2a95a84f2cbd92b10887f3c99c7858fae9e7e4 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Mon, 17 Jul 2017 16:10:29 -0500
|
||||
Subject: [PATCH 021/231] xen/x86: Remove SME feature in PV guests
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen does not currently support SME for PV guests. Clear the SME CPU
|
||||
capability in order to avoid any ambiguity.
|
||||
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: <xen-devel@lists.xen.org>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Cc: Dave Young <dyoung@redhat.com>
|
||||
Cc: Dmitry Vyukov <dvyukov@google.com>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Cc: Larry Woodman <lwoodman@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Matt Fleming <matt@codeblueprint.co.uk>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Radim Krčmář <rkrcmar@redhat.com>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
|
||||
Cc: kasan-dev@googlegroups.com
|
||||
Cc: kvm@vger.kernel.org
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: linux-doc@vger.kernel.org
|
||||
Cc: linux-efi@vger.kernel.org
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/3b605622a9fae5e588e5a13967120a18ec18071b.1500319216.git.thomas.lendacky@amd.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit f2f931c6819467af5260a21c59fb787ce2863f92)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8370907399392a637a2e51b4db3368fb594db3a6)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/enlighten_pv.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 290bc5ac9852..df1921751aa5 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -263,6 +263,7 @@ static void __init xen_init_capabilities(void)
|
||||
setup_clear_cpu_cap(X86_FEATURE_MTRR);
|
||||
setup_clear_cpu_cap(X86_FEATURE_ACC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_SME);
|
||||
|
||||
/*
|
||||
* Xen PV would need some work to support PCID: CR3 handling as well
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,152 @@
|
||||
From c63a9850ba744d9871b4ca2dad11588db5d670a2 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 7 Aug 2017 20:59:21 -0700
|
||||
Subject: [PATCH 022/231] x86/xen/64: Rearrange the SYSCALL entries
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen's raw SYSCALL entries are much less weird than native. Rather
|
||||
than fudging them to look like native entries, use the Xen-provided
|
||||
stack frame directly.
|
||||
|
||||
This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
|
||||
the SWAPGS_UNSAFE_STACK paravirt hook. The SYSENTER code would
|
||||
benefit from similar treatment.
|
||||
|
||||
This makes one change to the native code path: the compat
|
||||
instruction that clears the high 32 bits of %rax is moved slightly
|
||||
later. I'd be surprised if this affects performance at all.
|
||||
|
||||
Tested-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Link: http://lkml.kernel.org/r/7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 8a9949bc71a71b3dd633255ebe8f8869b1f73474)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b8cec41ee5f30df5032cfe8c86103f7d92a89590)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 9 ++-------
|
||||
arch/x86/entry/entry_64_compat.S | 7 +++----
|
||||
arch/x86/xen/xen-asm_64.S | 23 +++++++++--------------
|
||||
3 files changed, 14 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 64b233ab7cad..4dbb336a1fdd 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
- SWAPGS_UNSAFE_STACK
|
||||
- /*
|
||||
- * A hypervisor implementation might want to use a label
|
||||
- * after the swapgs, so that it can do the swapgs
|
||||
- * for the guest and jump here on syscall.
|
||||
- */
|
||||
-GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
|
||||
+ swapgs
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
@@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
+GLOBAL(entry_SYSCALL_64_after_hwframe)
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index e1721dafbcb1..5314d7b8e5ad 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
|
||||
*/
|
||||
ENTRY(entry_SYSCALL_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
- SWAPGS_UNSAFE_STACK
|
||||
+ swapgs
|
||||
|
||||
/* Stash user ESP and switch to the kernel stack. */
|
||||
movl %esp, %r8d
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
- /* Zero-extending 32-bit regs, do not remove */
|
||||
- movl %eax, %eax
|
||||
-
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER32_DS /* pt_regs->ss */
|
||||
pushq %r8 /* pt_regs->sp */
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER32_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
|
||||
+ movl %eax, %eax /* discard orig_ax high bits */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index c3df43141e70..a8a4f4c460a6 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
|
||||
* rip
|
||||
* r11
|
||||
* rsp->rcx
|
||||
- *
|
||||
- * In all the entrypoints, we undo all that to make it look like a
|
||||
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
|
||||
*/
|
||||
|
||||
-.macro undo_xen_syscall
|
||||
- mov 0*8(%rsp), %rcx
|
||||
- mov 1*8(%rsp), %r11
|
||||
- mov 5*8(%rsp), %rsp
|
||||
-.endm
|
||||
-
|
||||
/* Normal 64-bit system call target */
|
||||
ENTRY(xen_syscall_target)
|
||||
- undo_xen_syscall
|
||||
- jmp entry_SYSCALL_64_after_swapgs
|
||||
+ popq %rcx
|
||||
+ popq %r11
|
||||
+ jmp entry_SYSCALL_64_after_hwframe
|
||||
ENDPROC(xen_syscall_target)
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
/* 32-bit compat syscall target */
|
||||
ENTRY(xen_syscall32_target)
|
||||
- undo_xen_syscall
|
||||
- jmp entry_SYSCALL_compat
|
||||
+ popq %rcx
|
||||
+ popq %r11
|
||||
+ jmp entry_SYSCALL_compat_after_hwframe
|
||||
ENDPROC(xen_syscall32_target)
|
||||
|
||||
/* 32-bit compat sysenter target */
|
||||
ENTRY(xen_sysenter_target)
|
||||
- undo_xen_syscall
|
||||
+ mov 0*8(%rsp), %rcx
|
||||
+ mov 1*8(%rsp), %r11
|
||||
+ mov 5*8(%rsp), %rsp
|
||||
jmp entry_SYSENTER_compat
|
||||
ENDPROC(xen_sysenter_target)
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,223 @@
|
||||
From 050fcd1a748bd2f17b540d0147c8a4f3067653ee Mon Sep 17 00:00:00 2001
|
||||
From: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Date: Thu, 3 Aug 2017 11:38:21 +0900
|
||||
Subject: [PATCH 023/231] irq: Make the irqentry text section unconditional
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Generate irqentry and softirqentry text sections without
|
||||
any Kconfig dependencies. This will add extra sections, but
|
||||
there should be no performace impact.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
|
||||
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
|
||||
Cc: Chris Zankel <chris@zankel.net>
|
||||
Cc: David S . Miller <davem@davemloft.net>
|
||||
Cc: Francis Deslauriers <francis.deslauriers@efficios.com>
|
||||
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Max Filippov <jcmvbkbc@gmail.com>
|
||||
Cc: Mikael Starvik <starvik@axis.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: linux-cris-kernel@axis.com
|
||||
Cc: mathieu.desnoyers@efficios.com
|
||||
Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 229a71860547ec856b156179a9c6bef2de426f66)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8fd2f68cc93ae772cfddf4151d13448ff17d0229)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/arm/include/asm/traps.h | 7 -------
|
||||
arch/arm64/include/asm/traps.h | 7 -------
|
||||
include/asm-generic/sections.h | 4 ++++
|
||||
include/asm-generic/vmlinux.lds.h | 8 --------
|
||||
include/linux/interrupt.h | 14 +-------------
|
||||
arch/x86/kernel/unwind_frame.c | 2 --
|
||||
arch/x86/entry/entry_64.S | 9 ++-------
|
||||
7 files changed, 7 insertions(+), 44 deletions(-)
|
||||
|
||||
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
|
||||
index f555bb3664dc..683d9230984a 100644
|
||||
--- a/arch/arm/include/asm/traps.h
|
||||
+++ b/arch/arm/include/asm/traps.h
|
||||
@@ -18,7 +18,6 @@ struct undef_hook {
|
||||
void register_undef_hook(struct undef_hook *hook);
|
||||
void unregister_undef_hook(struct undef_hook *hook);
|
||||
|
||||
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
extern char __irqentry_text_start[];
|
||||
@@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
|
||||
return ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end;
|
||||
}
|
||||
-#else
|
||||
-static inline int __in_irqentry_text(unsigned long ptr)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-#endif
|
||||
|
||||
static inline int in_exception_text(unsigned long ptr)
|
||||
{
|
||||
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
|
||||
index 02e9035b0685..47a9066f7c86 100644
|
||||
--- a/arch/arm64/include/asm/traps.h
|
||||
+++ b/arch/arm64/include/asm/traps.h
|
||||
@@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook);
|
||||
|
||||
void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
|
||||
|
||||
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
return ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end;
|
||||
}
|
||||
-#else
|
||||
-static inline int __in_irqentry_text(unsigned long ptr)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-#endif
|
||||
|
||||
static inline int in_exception_text(unsigned long ptr)
|
||||
{
|
||||
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
|
||||
index 532372c6cf15..e5da44eddd2f 100644
|
||||
--- a/include/asm-generic/sections.h
|
||||
+++ b/include/asm-generic/sections.h
|
||||
@@ -27,6 +27,8 @@
|
||||
* __kprobes_text_start, __kprobes_text_end
|
||||
* __entry_text_start, __entry_text_end
|
||||
* __ctors_start, __ctors_end
|
||||
+ * __irqentry_text_start, __irqentry_text_end
|
||||
+ * __softirqentry_text_start, __softirqentry_text_end
|
||||
*/
|
||||
extern char _text[], _stext[], _etext[];
|
||||
extern char _data[], _sdata[], _edata[];
|
||||
@@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
|
||||
extern char __kprobes_text_start[], __kprobes_text_end[];
|
||||
extern char __entry_text_start[], __entry_text_end[];
|
||||
extern char __start_rodata[], __end_rodata[];
|
||||
+extern char __irqentry_text_start[], __irqentry_text_end[];
|
||||
+extern char __softirqentry_text_start[], __softirqentry_text_end[];
|
||||
|
||||
/* Start and end of .ctors section - used for constructor calls. */
|
||||
extern char __ctors_start[], __ctors_end[];
|
||||
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
|
||||
index 9623d78f8494..e7e955d4ab9e 100644
|
||||
--- a/include/asm-generic/vmlinux.lds.h
|
||||
+++ b/include/asm-generic/vmlinux.lds.h
|
||||
@@ -497,25 +497,17 @@
|
||||
*(.entry.text) \
|
||||
VMLINUX_SYMBOL(__entry_text_end) = .;
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
#define IRQENTRY_TEXT \
|
||||
ALIGN_FUNCTION(); \
|
||||
VMLINUX_SYMBOL(__irqentry_text_start) = .; \
|
||||
*(.irqentry.text) \
|
||||
VMLINUX_SYMBOL(__irqentry_text_end) = .;
|
||||
-#else
|
||||
-#define IRQENTRY_TEXT
|
||||
-#endif
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
#define SOFTIRQENTRY_TEXT \
|
||||
ALIGN_FUNCTION(); \
|
||||
VMLINUX_SYMBOL(__softirqentry_text_start) = .; \
|
||||
*(.softirqentry.text) \
|
||||
VMLINUX_SYMBOL(__softirqentry_text_end) = .;
|
||||
-#else
|
||||
-#define SOFTIRQENTRY_TEXT
|
||||
-#endif
|
||||
|
||||
/* Section used for early init (in .S files) */
|
||||
#define HEAD_TEXT *(.head.text)
|
||||
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
|
||||
index a2fddddb0d60..59ba11661b6e 100644
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/irq.h>
|
||||
+#include <asm/sections.h>
|
||||
|
||||
/*
|
||||
* These correspond to the IORESOURCE_IRQ_* defines in
|
||||
@@ -726,7 +727,6 @@ extern int early_irq_init(void);
|
||||
extern int arch_probe_nr_irqs(void);
|
||||
extern int arch_early_irq_init(void);
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
/*
|
||||
* We want to know which function is an entrypoint of a hardirq or a softirq.
|
||||
*/
|
||||
@@ -734,16 +734,4 @@ extern int arch_early_irq_init(void);
|
||||
#define __softirq_entry \
|
||||
__attribute__((__section__(".softirqentry.text")))
|
||||
|
||||
-/* Limits of hardirq entrypoints */
|
||||
-extern char __irqentry_text_start[];
|
||||
-extern char __irqentry_text_end[];
|
||||
-/* Limits of softirq entrypoints */
|
||||
-extern char __softirqentry_text_start[];
|
||||
-extern char __softirqentry_text_end[];
|
||||
-
|
||||
-#else
|
||||
-#define __irq_entry
|
||||
-#define __softirq_entry
|
||||
-#endif
|
||||
-
|
||||
#endif
|
||||
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
|
||||
index b9389d72b2f7..c29e5bc7e9c9 100644
|
||||
--- a/arch/x86/kernel/unwind_frame.c
|
||||
+++ b/arch/x86/kernel/unwind_frame.c
|
||||
@@ -91,10 +91,8 @@ static bool in_entry_code(unsigned long ip)
|
||||
if (addr >= __entry_text_start && addr < __entry_text_end)
|
||||
return true;
|
||||
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
|
||||
return true;
|
||||
-#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 4dbb336a1fdd..ca0b250eefc4 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -761,13 +761,8 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
|
||||
#endif
|
||||
|
||||
/* Make sure APIC interrupt handlers end up in the irqentry section: */
|
||||
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
-# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
|
||||
-# define POP_SECTION_IRQENTRY .popsection
|
||||
-#else
|
||||
-# define PUSH_SECTION_IRQENTRY
|
||||
-# define POP_SECTION_IRQENTRY
|
||||
-#endif
|
||||
+#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
|
||||
+#define POP_SECTION_IRQENTRY .popsection
|
||||
|
||||
.macro apicinterrupt num sym do_sym
|
||||
PUSH_SECTION_IRQENTRY
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,84 @@
|
||||
From 2b0794bbebac81a539dfd405273d61a8a16531d2 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 14 Aug 2017 22:36:19 -0700
|
||||
Subject: [PATCH 024/231] x86/xen/64: Fix the reported SS and CS in SYSCALL
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
When I cleaned up the Xen SYSCALL entries, I inadvertently changed
|
||||
the reported segment registers. Before my patch, regs->ss was
|
||||
__USER(32)_DS and regs->cs was __USER(32)_CS. After the patch, they
|
||||
are FLAT_USER_CS/DS(32).
|
||||
|
||||
This had a couple unfortunate effects. It confused the
|
||||
opportunistic fast return logic. It also significantly increased
|
||||
the risk of triggering a nasty glibc bug:
|
||||
|
||||
https://sourceware.org/bugzilla/show_bug.cgi?id=21269
|
||||
|
||||
Update the Xen entry code to change it back.
|
||||
|
||||
Reported-by: Brian Gerst <brgerst@gmail.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Fixes: 8a9949bc71a7 ("x86/xen/64: Rearrange the SYSCALL entries")
|
||||
Link: http://lkml.kernel.org/r/daba8351ea2764bb30272296ab9ce08a81bd8264.1502775273.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit fa2016a8e7d846b306e431646d250500e1da0c33)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 69a6ef3aeb274efe86fd74771830354f303ccc2f)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-asm_64.S | 18 ++++++++++++++++++
|
||||
1 file changed, 18 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index a8a4f4c460a6..c5fee2680abc 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -88,6 +88,15 @@ RELOC(xen_sysret64, 1b+1)
|
||||
ENTRY(xen_syscall_target)
|
||||
popq %rcx
|
||||
popq %r11
|
||||
+
|
||||
+ /*
|
||||
+ * Neither Xen nor the kernel really knows what the old SS and
|
||||
+ * CS were. The kernel expects __USER_DS and __USER_CS, so
|
||||
+ * report those values even though Xen will guess its own values.
|
||||
+ */
|
||||
+ movq $__USER_DS, 4*8(%rsp)
|
||||
+ movq $__USER_CS, 1*8(%rsp)
|
||||
+
|
||||
jmp entry_SYSCALL_64_after_hwframe
|
||||
ENDPROC(xen_syscall_target)
|
||||
|
||||
@@ -97,6 +106,15 @@ ENDPROC(xen_syscall_target)
|
||||
ENTRY(xen_syscall32_target)
|
||||
popq %rcx
|
||||
popq %r11
|
||||
+
|
||||
+ /*
|
||||
+ * Neither Xen nor the kernel really knows what the old SS and
|
||||
+ * CS were. The kernel expects __USER32_DS and __USER32_CS, so
|
||||
+ * report those values even though Xen will guess its own values.
|
||||
+ */
|
||||
+ movq $__USER32_DS, 4*8(%rsp)
|
||||
+ movq $__USER32_CS, 1*8(%rsp)
|
||||
+
|
||||
jmp entry_SYSCALL_compat_after_hwframe
|
||||
ENDPROC(xen_syscall32_target)
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
360
patches/kernel/0025-x86-paravirt-xen-Remove-xen_patch.patch
Normal file
360
patches/kernel/0025-x86-paravirt-xen-Remove-xen_patch.patch
Normal file
@ -0,0 +1,360 @@
|
||||
From e61e24c7ee0d773230646650659c34ffc5316520 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Wed, 16 Aug 2017 19:31:56 +0200
|
||||
Subject: [PATCH 025/231] x86/paravirt/xen: Remove xen_patch()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen's paravirt patch function xen_patch() does some special casing for
|
||||
irq_ops functions to apply relocations when those functions can be
|
||||
patched inline instead of calls.
|
||||
|
||||
Unfortunately none of the special case function replacements is small
|
||||
enough to be patched inline, so the special case never applies.
|
||||
|
||||
As xen_patch() will call paravirt_patch_default() in all cases it can
|
||||
be just dropped. xen-asm.h doesn't seem necessary without xen_patch()
|
||||
as the only thing left in it would be the definition of XEN_EFLAGS_NMI
|
||||
used only once. So move that definition and remove xen-asm.h.
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: boris.ostrovsky@oracle.com
|
||||
Cc: lguest@lists.ozlabs.org
|
||||
Cc: rusty@rustcorp.com.au
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Link: http://lkml.kernel.org/r/20170816173157.8633-2-jgross@suse.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit edcb5cf84f05e5d2e2af25422a72ccde359fcca9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c96c9c712136a9e24a7aaf0aac4c149eee01bd8e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-asm.h | 12 ---------
|
||||
arch/x86/xen/xen-ops.h | 15 +++---------
|
||||
arch/x86/xen/enlighten_pv.c | 59 +--------------------------------------------
|
||||
arch/x86/xen/xen-asm.S | 26 +++++---------------
|
||||
arch/x86/xen/xen-asm_32.S | 27 ++++-----------------
|
||||
arch/x86/xen/xen-asm_64.S | 20 ++++-----------
|
||||
6 files changed, 21 insertions(+), 138 deletions(-)
|
||||
delete mode 100644 arch/x86/xen/xen-asm.h
|
||||
|
||||
diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h
|
||||
deleted file mode 100644
|
||||
index 465276467a47..000000000000
|
||||
--- a/arch/x86/xen/xen-asm.h
|
||||
+++ /dev/null
|
||||
@@ -1,12 +0,0 @@
|
||||
-#ifndef _XEN_XEN_ASM_H
|
||||
-#define _XEN_XEN_ASM_H
|
||||
-
|
||||
-#include <linux/linkage.h>
|
||||
-
|
||||
-#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
|
||||
-#define ENDPATCH(x) .globl x##_end; x##_end=.
|
||||
-
|
||||
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
-#define XEN_EFLAGS_NMI 0x80000000
|
||||
-
|
||||
-#endif
|
||||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
|
||||
index 0d5004477db6..70301ac0d414 100644
|
||||
--- a/arch/x86/xen/xen-ops.h
|
||||
+++ b/arch/x86/xen/xen-ops.h
|
||||
@@ -129,17 +129,10 @@ static inline void __init xen_efi_init(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
-/* Declare an asm function, along with symbols needed to make it
|
||||
- inlineable */
|
||||
-#define DECL_ASM(ret, name, ...) \
|
||||
- __visible ret name(__VA_ARGS__); \
|
||||
- extern char name##_end[] __visible; \
|
||||
- extern char name##_reloc[] __visible
|
||||
-
|
||||
-DECL_ASM(void, xen_irq_enable_direct, void);
|
||||
-DECL_ASM(void, xen_irq_disable_direct, void);
|
||||
-DECL_ASM(unsigned long, xen_save_fl_direct, void);
|
||||
-DECL_ASM(void, xen_restore_fl_direct, unsigned long);
|
||||
+__visible void xen_irq_enable_direct(void);
|
||||
+__visible void xen_irq_disable_direct(void);
|
||||
+__visible unsigned long xen_save_fl_direct(void);
|
||||
+__visible void xen_restore_fl_direct(unsigned long);
|
||||
|
||||
/* These are not functions, and cannot be called normally */
|
||||
__visible void xen_iret(void);
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index df1921751aa5..6c279c8f0a0e 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -988,59 +988,6 @@ void __ref xen_setup_vcpu_info_placement(void)
|
||||
}
|
||||
}
|
||||
|
||||
-static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||
- unsigned long addr, unsigned len)
|
||||
-{
|
||||
- char *start, *end, *reloc;
|
||||
- unsigned ret;
|
||||
-
|
||||
- start = end = reloc = NULL;
|
||||
-
|
||||
-#define SITE(op, x) \
|
||||
- case PARAVIRT_PATCH(op.x): \
|
||||
- if (xen_have_vcpu_info_placement) { \
|
||||
- start = (char *)xen_##x##_direct; \
|
||||
- end = xen_##x##_direct_end; \
|
||||
- reloc = xen_##x##_direct_reloc; \
|
||||
- } \
|
||||
- goto patch_site
|
||||
-
|
||||
- switch (type) {
|
||||
- SITE(pv_irq_ops, irq_enable);
|
||||
- SITE(pv_irq_ops, irq_disable);
|
||||
- SITE(pv_irq_ops, save_fl);
|
||||
- SITE(pv_irq_ops, restore_fl);
|
||||
-#undef SITE
|
||||
-
|
||||
- patch_site:
|
||||
- if (start == NULL || (end-start) > len)
|
||||
- goto default_patch;
|
||||
-
|
||||
- ret = paravirt_patch_insns(insnbuf, len, start, end);
|
||||
-
|
||||
- /* Note: because reloc is assigned from something that
|
||||
- appears to be an array, gcc assumes it's non-null,
|
||||
- but doesn't know its relationship with start and
|
||||
- end. */
|
||||
- if (reloc > start && reloc < end) {
|
||||
- int reloc_off = reloc - start;
|
||||
- long *relocp = (long *)(insnbuf + reloc_off);
|
||||
- long delta = start - (char *)addr;
|
||||
-
|
||||
- *relocp += delta;
|
||||
- }
|
||||
- break;
|
||||
-
|
||||
- default_patch:
|
||||
- default:
|
||||
- ret = paravirt_patch_default(type, clobbers, insnbuf,
|
||||
- addr, len);
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
static const struct pv_info xen_info __initconst = {
|
||||
.shared_kernel_pmd = 0,
|
||||
|
||||
@@ -1050,10 +997,6 @@ static const struct pv_info xen_info __initconst = {
|
||||
.name = "Xen",
|
||||
};
|
||||
|
||||
-static const struct pv_init_ops xen_init_ops __initconst = {
|
||||
- .patch = xen_patch,
|
||||
-};
|
||||
-
|
||||
static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.cpuid = xen_cpuid,
|
||||
|
||||
@@ -1251,7 +1194,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
||||
|
||||
/* Install Xen paravirt ops */
|
||||
pv_info = xen_info;
|
||||
- pv_init_ops = xen_init_ops;
|
||||
+ pv_init_ops.patch = paravirt_patch_default;
|
||||
pv_cpu_ops = xen_cpu_ops;
|
||||
|
||||
x86_platform.get_nmi_reason = xen_get_nmi_reason;
|
||||
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
|
||||
index eff224df813f..dcd31fa39b5d 100644
|
||||
--- a/arch/x86/xen/xen-asm.S
|
||||
+++ b/arch/x86/xen/xen-asm.S
|
||||
@@ -1,14 +1,8 @@
|
||||
/*
|
||||
- * Asm versions of Xen pv-ops, suitable for either direct use or
|
||||
- * inlining. The inline versions are the same as the direct-use
|
||||
- * versions, with the pre- and post-amble chopped off.
|
||||
- *
|
||||
- * This code is encoded for size rather than absolute efficiency, with
|
||||
- * a view to being able to inline as much as possible.
|
||||
+ * Asm versions of Xen pv-ops, suitable for direct use.
|
||||
*
|
||||
* We only bother with direct forms (ie, vcpu in percpu data) of the
|
||||
- * operations here; the indirect forms are better handled in C, since
|
||||
- * they're generally too large to inline anyway.
|
||||
+ * operations here; the indirect forms are better handled in C.
|
||||
*/
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
@@ -16,7 +10,7 @@
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
-#include "xen-asm.h"
|
||||
+#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* Enable events. This clears the event mask and tests the pending
|
||||
@@ -38,13 +32,11 @@ ENTRY(xen_irq_enable_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
|
||||
-2: call check_events
|
||||
+ call check_events
|
||||
1:
|
||||
-ENDPATCH(xen_irq_enable_direct)
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(xen_irq_enable_direct)
|
||||
- RELOC(xen_irq_enable_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
@@ -53,10 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
|
||||
*/
|
||||
ENTRY(xen_irq_disable_direct)
|
||||
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
-ENDPATCH(xen_irq_disable_direct)
|
||||
ret
|
||||
- ENDPROC(xen_irq_disable_direct)
|
||||
- RELOC(xen_irq_disable_direct, 0)
|
||||
+ENDPROC(xen_irq_disable_direct)
|
||||
|
||||
/*
|
||||
* (xen_)save_fl is used to get the current interrupt enable status.
|
||||
@@ -71,10 +61,8 @@ ENTRY(xen_save_fl_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
setz %ah
|
||||
addb %ah, %ah
|
||||
-ENDPATCH(xen_save_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_save_fl_direct)
|
||||
- RELOC(xen_save_fl_direct, 0)
|
||||
|
||||
|
||||
/*
|
||||
@@ -101,13 +89,11 @@ ENTRY(xen_restore_fl_direct)
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jnz 1f
|
||||
-2: call check_events
|
||||
+ call check_events
|
||||
1:
|
||||
-ENDPATCH(xen_restore_fl_direct)
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(xen_restore_fl_direct)
|
||||
- RELOC(xen_restore_fl_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
|
||||
index feb6d40a0860..1200e262a116 100644
|
||||
--- a/arch/x86/xen/xen-asm_32.S
|
||||
+++ b/arch/x86/xen/xen-asm_32.S
|
||||
@@ -1,14 +1,8 @@
|
||||
/*
|
||||
- * Asm versions of Xen pv-ops, suitable for either direct use or
|
||||
- * inlining. The inline versions are the same as the direct-use
|
||||
- * versions, with the pre- and post-amble chopped off.
|
||||
- *
|
||||
- * This code is encoded for size rather than absolute efficiency, with
|
||||
- * a view to being able to inline as much as possible.
|
||||
+ * Asm versions of Xen pv-ops, suitable for direct use.
|
||||
*
|
||||
* We only bother with direct forms (ie, vcpu in pda) of the
|
||||
- * operations here; the indirect forms are better handled in C, since
|
||||
- * they're generally too large to inline anyway.
|
||||
+ * operations here; the indirect forms are better handled in C.
|
||||
*/
|
||||
|
||||
#include <asm/thread_info.h>
|
||||
@@ -18,21 +12,10 @@
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
-#include "xen-asm.h"
|
||||
+#include <linux/linkage.h>
|
||||
|
||||
-/*
|
||||
- * Force an event check by making a hypercall, but preserve regs
|
||||
- * before making the call.
|
||||
- */
|
||||
-check_events:
|
||||
- push %eax
|
||||
- push %ecx
|
||||
- push %edx
|
||||
- call xen_force_evtchn_callback
|
||||
- pop %edx
|
||||
- pop %ecx
|
||||
- pop %eax
|
||||
- ret
|
||||
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
+#define XEN_EFLAGS_NMI 0x80000000
|
||||
|
||||
/*
|
||||
* This is run where a normal iret would be run, with the same stack setup:
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index c5fee2680abc..3a3b6a211584 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -1,14 +1,8 @@
|
||||
/*
|
||||
- * Asm versions of Xen pv-ops, suitable for either direct use or
|
||||
- * inlining. The inline versions are the same as the direct-use
|
||||
- * versions, with the pre- and post-amble chopped off.
|
||||
- *
|
||||
- * This code is encoded for size rather than absolute efficiency, with
|
||||
- * a view to being able to inline as much as possible.
|
||||
+ * Asm versions of Xen pv-ops, suitable for direct use.
|
||||
*
|
||||
* We only bother with direct forms (ie, vcpu in pda) of the
|
||||
- * operations here; the indirect forms are better handled in C, since
|
||||
- * they're generally too large to inline anyway.
|
||||
+ * operations here; the indirect forms are better handled in C.
|
||||
*/
|
||||
|
||||
#include <asm/errno.h>
|
||||
@@ -20,7 +14,7 @@
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
-#include "xen-asm.h"
|
||||
+#include <linux/linkage.h>
|
||||
|
||||
ENTRY(xen_adjust_exception_frame)
|
||||
mov 8+0(%rsp), %rcx
|
||||
@@ -46,9 +40,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
||||
*/
|
||||
ENTRY(xen_iret)
|
||||
pushq $0
|
||||
-1: jmp hypercall_iret
|
||||
-ENDPATCH(xen_iret)
|
||||
-RELOC(xen_iret, 1b+1)
|
||||
+ jmp hypercall_iret
|
||||
|
||||
ENTRY(xen_sysret64)
|
||||
/*
|
||||
@@ -65,9 +57,7 @@ ENTRY(xen_sysret64)
|
||||
pushq %rcx
|
||||
|
||||
pushq $VGCF_in_syscall
|
||||
-1: jmp hypercall_iret
|
||||
-ENDPATCH(xen_sysret64)
|
||||
-RELOC(xen_sysret64, 1b+1)
|
||||
+ jmp hypercall_iret
|
||||
|
||||
/*
|
||||
* Xen handles syscall callbacks much like ordinary exceptions, which
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,218 @@
|
||||
From e61177a6feca143d431be190d4758bda23f6174d Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:22 +0200
|
||||
Subject: [PATCH 026/231] x86/traps: Simplify pagefault tracing logic
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Make use of the new irqvector tracing static key and remove the duplicated
|
||||
trace_do_pagefault() implementation.
|
||||
|
||||
If irq vector tracing is disabled, then the overhead of this is a single
|
||||
NOP5, which is a reasonable tradeoff to avoid duplicated code and the
|
||||
unholy macro mess.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064956.672965407@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 11a7ffb01703c3bbb1e9b968893f4487a1b0b5a8)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8478bb5608747fd64c9fd4a2f5422fb4af756a50)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/traps.h | 10 +--------
|
||||
arch/x86/kernel/kvm.c | 2 +-
|
||||
arch/x86/mm/fault.c | 49 ++++++++++++--------------------------------
|
||||
arch/x86/entry/entry_32.S | 8 --------
|
||||
arch/x86/entry/entry_64.S | 13 +-----------
|
||||
5 files changed, 16 insertions(+), 66 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index 01fd0a7f48cd..b4f322d6c95f 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -39,7 +39,6 @@ asmlinkage void machine_check(void);
|
||||
asmlinkage void simd_coprocessor_error(void);
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
-asmlinkage void trace_page_fault(void);
|
||||
#define trace_stack_segment stack_segment
|
||||
#define trace_divide_error divide_error
|
||||
#define trace_bounds bounds
|
||||
@@ -54,6 +53,7 @@ asmlinkage void trace_page_fault(void);
|
||||
#define trace_alignment_check alignment_check
|
||||
#define trace_simd_coprocessor_error simd_coprocessor_error
|
||||
#define trace_async_page_fault async_page_fault
|
||||
+#define trace_page_fault page_fault
|
||||
#endif
|
||||
|
||||
dotraplinkage void do_divide_error(struct pt_regs *, long);
|
||||
@@ -74,14 +74,6 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
|
||||
#endif
|
||||
dotraplinkage void do_general_protection(struct pt_regs *, long);
|
||||
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
|
||||
-#ifdef CONFIG_TRACING
|
||||
-dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
|
||||
-#else
|
||||
-static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
|
||||
-{
|
||||
- do_page_fault(regs, error);
|
||||
-}
|
||||
-#endif
|
||||
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
|
||||
dotraplinkage void do_alignment_check(struct pt_regs *, long);
|
||||
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
|
||||
index e5e4306e4546..9e3798b00e40 100644
|
||||
--- a/arch/x86/kernel/kvm.c
|
||||
+++ b/arch/x86/kernel/kvm.c
|
||||
@@ -270,7 +270,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
|
||||
switch (kvm_read_and_reset_pf_reason()) {
|
||||
default:
|
||||
- trace_do_page_fault(regs, error_code);
|
||||
+ do_page_fault(regs, error_code);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
/* page is swapped out by the host. */
|
||||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
|
||||
index 955be01dd9cc..4ee9eb916826 100644
|
||||
--- a/arch/x86/mm/fault.c
|
||||
+++ b/arch/x86/mm/fault.c
|
||||
@@ -1253,10 +1253,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
|
||||
* This routine handles page faults. It determines the address,
|
||||
* and the problem, and then passes it off to one of the appropriate
|
||||
* routines.
|
||||
- *
|
||||
- * This function must have noinline because both callers
|
||||
- * {,trace_}do_page_fault() have notrace on. Having this an actual function
|
||||
- * guarantees there's a function trace entry.
|
||||
*/
|
||||
static noinline void
|
||||
__do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
@@ -1491,27 +1487,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
}
|
||||
NOKPROBE_SYMBOL(__do_page_fault);
|
||||
|
||||
-dotraplinkage void notrace
|
||||
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
-{
|
||||
- unsigned long address = read_cr2(); /* Get the faulting address */
|
||||
- enum ctx_state prev_state;
|
||||
-
|
||||
- /*
|
||||
- * We must have this function tagged with __kprobes, notrace and call
|
||||
- * read_cr2() before calling anything else. To avoid calling any kind
|
||||
- * of tracing machinery before we've observed the CR2 value.
|
||||
- *
|
||||
- * exception_{enter,exit}() contain all sorts of tracepoints.
|
||||
- */
|
||||
-
|
||||
- prev_state = exception_enter();
|
||||
- __do_page_fault(regs, error_code, address);
|
||||
- exception_exit(prev_state);
|
||||
-}
|
||||
-NOKPROBE_SYMBOL(do_page_fault);
|
||||
-
|
||||
-#ifdef CONFIG_TRACING
|
||||
static nokprobe_inline void
|
||||
trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
|
||||
unsigned long error_code)
|
||||
@@ -1522,22 +1497,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
|
||||
trace_page_fault_kernel(address, regs, error_code);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * We must have this function blacklisted from kprobes, tagged with notrace
|
||||
+ * and call read_cr2() before calling anything else. To avoid calling any
|
||||
+ * kind of tracing machinery before we've observed the CR2 value.
|
||||
+ *
|
||||
+ * exception_{enter,exit}() contains all sorts of tracepoints.
|
||||
+ */
|
||||
dotraplinkage void notrace
|
||||
-trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
- /*
|
||||
- * The exception_enter and tracepoint processing could
|
||||
- * trigger another page faults (user space callchain
|
||||
- * reading) and destroy the original cr2 value, so read
|
||||
- * the faulting address now.
|
||||
- */
|
||||
- unsigned long address = read_cr2();
|
||||
+ unsigned long address = read_cr2(); /* Get the faulting address */
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
- trace_page_fault_entries(address, regs, error_code);
|
||||
+ if (trace_irqvectors_enabled())
|
||||
+ trace_page_fault_entries(address, regs, error_code);
|
||||
+
|
||||
__do_page_fault(regs, error_code, address);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
-NOKPROBE_SYMBOL(trace_do_page_fault);
|
||||
-#endif /* CONFIG_TRACING */
|
||||
+NOKPROBE_SYMBOL(do_page_fault);
|
||||
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
|
||||
index 48ef7bb32c42..0092da1c056f 100644
|
||||
--- a/arch/x86/entry/entry_32.S
|
||||
+++ b/arch/x86/entry/entry_32.S
|
||||
@@ -891,14 +891,6 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
|
||||
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
-#ifdef CONFIG_TRACING
|
||||
-ENTRY(trace_page_fault)
|
||||
- ASM_CLAC
|
||||
- pushl $trace_do_page_fault
|
||||
- jmp common_exception
|
||||
-END(trace_page_fault)
|
||||
-#endif
|
||||
-
|
||||
ENTRY(page_fault)
|
||||
ASM_CLAC
|
||||
pushl $do_page_fault
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index ca0b250eefc4..dfabcbf8e813 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -913,17 +913,6 @@ ENTRY(\sym)
|
||||
END(\sym)
|
||||
.endm
|
||||
|
||||
-#ifdef CONFIG_TRACING
|
||||
-.macro trace_idtentry sym do_sym has_error_code:req
|
||||
-idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
|
||||
-idtentry \sym \do_sym has_error_code=\has_error_code
|
||||
-.endm
|
||||
-#else
|
||||
-.macro trace_idtentry sym do_sym has_error_code:req
|
||||
-idtentry \sym \do_sym has_error_code=\has_error_code
|
||||
-.endm
|
||||
-#endif
|
||||
-
|
||||
idtentry divide_error do_divide_error has_error_code=0
|
||||
idtentry overflow do_overflow has_error_code=0
|
||||
idtentry bounds do_bounds has_error_code=0
|
||||
@@ -1091,7 +1080,7 @@ idtentry xen_stack_segment do_stack_segment has_error_code=1
|
||||
#endif
|
||||
|
||||
idtentry general_protection do_general_protection has_error_code=1
|
||||
-trace_idtentry page_fault do_page_fault has_error_code=1
|
||||
+idtentry page_fault do_page_fault has_error_code=1
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
idtentry async_page_fault do_async_page_fault has_error_code=1
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,263 @@
|
||||
From 12f71c3ef98c53a158abec93ef40cd15c9120284 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:37 +0200
|
||||
Subject: [PATCH 027/231] x86/idt: Unify gate_struct handling for 32/64-bit
|
||||
kernels
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The first 32 bits of gate struct are the same for 32 and 64 bit kernels.
|
||||
|
||||
The 32-bit version uses desc_struct and no designated data structure,
|
||||
so we need different accessors for 32 and 64 bit kernels.
|
||||
|
||||
Aside of that the macros which are necessary to build the 32-bit
|
||||
gate descriptor are horrible to read.
|
||||
|
||||
Unify the gate structs and switch all code fiddling with it over.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064957.861974317@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 64b163fab684e3de47aa8db6cc08ae7d2e194373)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 587719b1926757eb7531e0631d63fb93cd60d0d3)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/desc.h | 45 ++++++++++++++-----------------
|
||||
arch/x86/include/asm/desc_defs.h | 57 ++++++++++++++++++++++++++--------------
|
||||
arch/x86/kvm/vmx.c | 2 +-
|
||||
arch/x86/xen/enlighten_pv.c | 12 ++++-----
|
||||
4 files changed, 63 insertions(+), 53 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
|
||||
index d0a21b12dd58..57e502a4e92f 100644
|
||||
--- a/arch/x86/include/asm/desc.h
|
||||
+++ b/arch/x86/include/asm/desc.h
|
||||
@@ -83,33 +83,25 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
|
||||
return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_X86_64
|
||||
-
|
||||
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
|
||||
unsigned dpl, unsigned ist, unsigned seg)
|
||||
{
|
||||
- gate->offset_low = PTR_LOW(func);
|
||||
+ gate->offset_low = (u16) func;
|
||||
+ gate->bits.p = 1;
|
||||
+ gate->bits.dpl = dpl;
|
||||
+ gate->bits.zero = 0;
|
||||
+ gate->bits.type = type;
|
||||
+ gate->offset_middle = (u16) (func >> 16);
|
||||
+#ifdef CONFIG_X86_64
|
||||
gate->segment = __KERNEL_CS;
|
||||
- gate->ist = ist;
|
||||
- gate->p = 1;
|
||||
- gate->dpl = dpl;
|
||||
- gate->zero0 = 0;
|
||||
- gate->zero1 = 0;
|
||||
- gate->type = type;
|
||||
- gate->offset_middle = PTR_MIDDLE(func);
|
||||
- gate->offset_high = PTR_HIGH(func);
|
||||
-}
|
||||
-
|
||||
+ gate->bits.ist = ist;
|
||||
+ gate->reserved = 0;
|
||||
+ gate->offset_high = (u32) (func >> 32);
|
||||
#else
|
||||
-static inline void pack_gate(gate_desc *gate, unsigned char type,
|
||||
- unsigned long base, unsigned dpl, unsigned flags,
|
||||
- unsigned short seg)
|
||||
-{
|
||||
- gate->a = (seg << 16) | (base & 0xffff);
|
||||
- gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
|
||||
-}
|
||||
-
|
||||
+ gate->segment = seg;
|
||||
+ gate->bits.ist = 0;
|
||||
#endif
|
||||
+}
|
||||
|
||||
static inline int desc_empty(const void *ptr)
|
||||
{
|
||||
@@ -185,7 +177,8 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
|
||||
}
|
||||
|
||||
|
||||
-static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
|
||||
+static inline void set_tssldt_descriptor(void *d, unsigned long addr,
|
||||
+ unsigned type, unsigned size)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
struct ldttss_desc64 *desc = d;
|
||||
@@ -193,13 +186,13 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
|
||||
desc->limit0 = size & 0xFFFF;
|
||||
- desc->base0 = PTR_LOW(addr);
|
||||
- desc->base1 = PTR_MIDDLE(addr) & 0xFF;
|
||||
+ desc->base0 = (u16) addr;
|
||||
+ desc->base1 = (addr >> 16) & 0xFF;
|
||||
desc->type = type;
|
||||
desc->p = 1;
|
||||
desc->limit1 = (size >> 16) & 0xF;
|
||||
- desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
|
||||
- desc->base3 = PTR_HIGH(addr);
|
||||
+ desc->base2 = (addr >> 24) & 0xFF;
|
||||
+ desc->base3 = (u32) (addr >> 32);
|
||||
#else
|
||||
pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
|
||||
#endif
|
||||
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
|
||||
index 49265345d4d2..d684bee8a59a 100644
|
||||
--- a/arch/x86/include/asm/desc_defs.h
|
||||
+++ b/arch/x86/include/asm/desc_defs.h
|
||||
@@ -47,20 +47,6 @@ enum {
|
||||
GATE_TASK = 0x5,
|
||||
};
|
||||
|
||||
-/* 16byte gate */
|
||||
-struct gate_struct64 {
|
||||
- u16 offset_low;
|
||||
- u16 segment;
|
||||
- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
|
||||
- u16 offset_middle;
|
||||
- u32 offset_high;
|
||||
- u32 zero1;
|
||||
-} __attribute__((packed));
|
||||
-
|
||||
-#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
|
||||
-#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
|
||||
-#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
|
||||
-
|
||||
enum {
|
||||
DESC_TSS = 0x9,
|
||||
DESC_LDT = 0x2,
|
||||
@@ -77,20 +63,51 @@ struct ldttss_desc64 {
|
||||
u32 zero1;
|
||||
} __attribute__((packed));
|
||||
|
||||
+
|
||||
#ifdef CONFIG_X86_64
|
||||
-typedef struct gate_struct64 gate_desc;
|
||||
typedef struct ldttss_desc64 ldt_desc;
|
||||
typedef struct ldttss_desc64 tss_desc;
|
||||
-#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
|
||||
-#define gate_segment(g) ((g).segment)
|
||||
#else
|
||||
-typedef struct desc_struct gate_desc;
|
||||
typedef struct desc_struct ldt_desc;
|
||||
typedef struct desc_struct tss_desc;
|
||||
-#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
|
||||
-#define gate_segment(g) ((g).a >> 16)
|
||||
#endif
|
||||
|
||||
+struct idt_bits {
|
||||
+ u16 ist : 3,
|
||||
+ zero : 5,
|
||||
+ type : 5,
|
||||
+ dpl : 2,
|
||||
+ p : 1;
|
||||
+} __attribute__((packed));
|
||||
+
|
||||
+struct gate_struct {
|
||||
+ u16 offset_low;
|
||||
+ u16 segment;
|
||||
+ struct idt_bits bits;
|
||||
+ u16 offset_middle;
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ u32 offset_high;
|
||||
+ u32 reserved;
|
||||
+#endif
|
||||
+} __attribute__((packed));
|
||||
+
|
||||
+typedef struct gate_struct gate_desc;
|
||||
+
|
||||
+static inline unsigned long gate_offset(const gate_desc *g)
|
||||
+{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ return g->offset_low | ((unsigned long)g->offset_middle << 16) |
|
||||
+ ((unsigned long) g->offset_high << 32);
|
||||
+#else
|
||||
+ return g->offset_low | ((unsigned long)g->offset_middle << 16);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long gate_segment(const gate_desc *g)
|
||||
+{
|
||||
+ return g->segment;
|
||||
+}
|
||||
+
|
||||
struct desc_ptr {
|
||||
unsigned short size;
|
||||
unsigned long address;
|
||||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
||||
index a2c95522ac99..7b447d126d17 100644
|
||||
--- a/arch/x86/kvm/vmx.c
|
||||
+++ b/arch/x86/kvm/vmx.c
|
||||
@@ -8838,7 +8838,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
|
||||
|
||||
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
||||
desc = (gate_desc *)vmx->host_idt_base + vector;
|
||||
- entry = gate_offset(*desc);
|
||||
+ entry = gate_offset(desc);
|
||||
asm volatile(
|
||||
#ifdef CONFIG_X86_64
|
||||
"mov %%" _ASM_SP ", %[sp]\n\t"
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 6c279c8f0a0e..49ee3315b9f7 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -591,12 +591,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
- if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
|
||||
+ if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
|
||||
return 0;
|
||||
|
||||
info->vector = vector;
|
||||
|
||||
- addr = gate_offset(*val);
|
||||
+ addr = gate_offset(val);
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Look for known traps using IST, and substitute them
|
||||
@@ -629,16 +629,16 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
;
|
||||
else {
|
||||
/* Some other trap using IST? */
|
||||
- if (WARN_ON(val->ist != 0))
|
||||
+ if (WARN_ON(val->bits.ist != 0))
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
info->address = addr;
|
||||
|
||||
- info->cs = gate_segment(*val);
|
||||
- info->flags = val->dpl;
|
||||
+ info->cs = gate_segment(val);
|
||||
+ info->flags = val->bits.dpl;
|
||||
/* interrupt gates clear IF */
|
||||
- if (val->type == GATE_INTERRUPT)
|
||||
+ if (val->bits.type == GATE_INTERRUPT)
|
||||
info->flags |= 1 << 2;
|
||||
|
||||
return 1;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,93 @@
|
||||
From a0b37d5a5f250199b6df4e9404d2071802591de6 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:40 +0200
|
||||
Subject: [PATCH 028/231] x86/asm: Replace access to desc_struct:a/b fields
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The union inside of desc_struct which allows access to the raw u32 parts of
|
||||
the descriptors. This raw access part is about to go away.
|
||||
|
||||
Replace the few code parts which access those fields.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064958.120214366@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 9a98e7780022aa7cd201eb8a88a4f1d607b73cde)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 8469c76c61ea9c3b86b596352d1148bace5ea706)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/xen/hypercall.h | 6 ++++--
|
||||
arch/x86/kernel/tls.c | 2 +-
|
||||
arch/x86/xen/enlighten_pv.c | 2 +-
|
||||
3 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
|
||||
index 11071fcd630e..9606688caa4b 100644
|
||||
--- a/arch/x86/include/asm/xen/hypercall.h
|
||||
+++ b/arch/x86/include/asm/xen/hypercall.h
|
||||
@@ -552,6 +552,8 @@ static inline void
|
||||
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
|
||||
struct desc_struct desc)
|
||||
{
|
||||
+ u32 *p = (u32 *) &desc;
|
||||
+
|
||||
mcl->op = __HYPERVISOR_update_descriptor;
|
||||
if (sizeof(maddr) == sizeof(long)) {
|
||||
mcl->args[0] = maddr;
|
||||
@@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
|
||||
} else {
|
||||
mcl->args[0] = maddr;
|
||||
mcl->args[1] = maddr >> 32;
|
||||
- mcl->args[2] = desc.a;
|
||||
- mcl->args[3] = desc.b;
|
||||
+ mcl->args[2] = *p++;
|
||||
+ mcl->args[3] = *p;
|
||||
}
|
||||
|
||||
trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
|
||||
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
|
||||
index dcd699baea1b..a106b9719c58 100644
|
||||
--- a/arch/x86/kernel/tls.c
|
||||
+++ b/arch/x86/kernel/tls.c
|
||||
@@ -93,7 +93,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
|
||||
|
||||
while (n-- > 0) {
|
||||
if (LDT_empty(info) || LDT_zero(info)) {
|
||||
- desc->a = desc->b = 0;
|
||||
+ memset(desc, 0, sizeof(*desc));
|
||||
} else {
|
||||
fill_ldt(desc, info);
|
||||
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 49ee3315b9f7..c76f5ff4d0d7 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -501,7 +501,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
|
||||
static inline bool desc_equal(const struct desc_struct *d1,
|
||||
const struct desc_struct *d2)
|
||||
{
|
||||
- return d1->a == d2->a && d1->b == d2->b;
|
||||
+ return !memcmp(d1, d2, sizeof(*d1));
|
||||
}
|
||||
|
||||
static void load_TLS_descriptor(struct thread_struct *t,
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,437 @@
|
||||
From e5688fb8c2c243658f3fe754d33c7250c8aed146 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Thu, 31 Aug 2017 19:42:49 +0200
|
||||
Subject: [PATCH 029/231] x86/xen: Get rid of paravirt op
|
||||
adjust_exception_frame
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
When running as Xen pv-guest the exception frame on the stack contains
|
||||
%r11 and %rcx additional to the other data pushed by the processor.
|
||||
|
||||
Instead of having a paravirt op being called for each exception type
|
||||
prepend the Xen specific code to each exception entry. When running as
|
||||
Xen pv-guest just use the exception entry with prepended instructions,
|
||||
otherwise use the entry without the Xen specific code.
|
||||
|
||||
[ tglx: Merged through tip to avoid ugly merge conflict ]
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Cc: boris.ostrovsky@oracle.com
|
||||
Cc: luto@amacapital.net
|
||||
Link: http://lkml.kernel.org/r/20170831174249.26853-1-jg@pfupf.net
|
||||
(backported from commit 5878d5d6fdef6447d73b0acc121ba445bef37f53)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 9a6fb927deb3ebbe831741ca82081714637181a7)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/paravirt.h | 5 --
|
||||
arch/x86/include/asm/paravirt_types.h | 3 --
|
||||
arch/x86/include/asm/proto.h | 3 ++
|
||||
arch/x86/include/asm/traps.h | 28 ++++++++--
|
||||
arch/x86/xen/xen-ops.h | 1 -
|
||||
arch/x86/kernel/asm-offsets_64.c | 1 -
|
||||
arch/x86/kernel/paravirt.c | 3 --
|
||||
arch/x86/xen/enlighten_pv.c | 98 +++++++++++++++++++++++------------
|
||||
arch/x86/xen/irq.c | 3 --
|
||||
arch/x86/entry/entry_64.S | 23 ++------
|
||||
arch/x86/entry/entry_64_compat.S | 1 -
|
||||
arch/x86/xen/xen-asm_64.S | 41 +++++++++++++--
|
||||
12 files changed, 133 insertions(+), 77 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
|
||||
index 9ccac1926587..c25dd22f7c70 100644
|
||||
--- a/arch/x86/include/asm/paravirt.h
|
||||
+++ b/arch/x86/include/asm/paravirt.h
|
||||
@@ -960,11 +960,6 @@ extern void default_banner(void);
|
||||
#define GET_CR2_INTO_RAX \
|
||||
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
|
||||
|
||||
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
|
||||
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
|
||||
- CLBR_NONE, \
|
||||
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
|
||||
-
|
||||
#define USERGS_SYSRET64 \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
||||
CLBR_NONE, \
|
||||
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
|
||||
index 9ffc36bfe4cd..6b64fc6367f2 100644
|
||||
--- a/arch/x86/include/asm/paravirt_types.h
|
||||
+++ b/arch/x86/include/asm/paravirt_types.h
|
||||
@@ -196,9 +196,6 @@ struct pv_irq_ops {
|
||||
void (*safe_halt)(void);
|
||||
void (*halt)(void);
|
||||
|
||||
-#ifdef CONFIG_X86_64
|
||||
- void (*adjust_exception_frame)(void);
|
||||
-#endif
|
||||
} __no_randomize_layout;
|
||||
|
||||
struct pv_mmu_ops {
|
||||
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
|
||||
index 8d3964fc5f91..b408b1886195 100644
|
||||
--- a/arch/x86/include/asm/proto.h
|
||||
+++ b/arch/x86/include/asm/proto.h
|
||||
@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
|
||||
void __end_entry_SYSENTER_compat(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
void entry_INT80_compat(void);
|
||||
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
+void xen_entry_INT80_compat(void);
|
||||
+#endif
|
||||
#endif
|
||||
|
||||
void x86_configure_nx(void);
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index b4f322d6c95f..feb89dbe359d 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -13,9 +13,6 @@ asmlinkage void divide_error(void);
|
||||
asmlinkage void debug(void);
|
||||
asmlinkage void nmi(void);
|
||||
asmlinkage void int3(void);
|
||||
-asmlinkage void xen_debug(void);
|
||||
-asmlinkage void xen_int3(void);
|
||||
-asmlinkage void xen_stack_segment(void);
|
||||
asmlinkage void overflow(void);
|
||||
asmlinkage void bounds(void);
|
||||
asmlinkage void invalid_op(void);
|
||||
@@ -56,6 +53,31 @@ asmlinkage void simd_coprocessor_error(void);
|
||||
#define trace_page_fault page_fault
|
||||
#endif
|
||||
|
||||
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
+asmlinkage void xen_divide_error(void);
|
||||
+asmlinkage void xen_xendebug(void);
|
||||
+asmlinkage void xen_xenint3(void);
|
||||
+asmlinkage void xen_nmi(void);
|
||||
+asmlinkage void xen_overflow(void);
|
||||
+asmlinkage void xen_bounds(void);
|
||||
+asmlinkage void xen_invalid_op(void);
|
||||
+asmlinkage void xen_device_not_available(void);
|
||||
+asmlinkage void xen_double_fault(void);
|
||||
+asmlinkage void xen_coprocessor_segment_overrun(void);
|
||||
+asmlinkage void xen_invalid_TSS(void);
|
||||
+asmlinkage void xen_segment_not_present(void);
|
||||
+asmlinkage void xen_stack_segment(void);
|
||||
+asmlinkage void xen_general_protection(void);
|
||||
+asmlinkage void xen_page_fault(void);
|
||||
+asmlinkage void xen_spurious_interrupt_bug(void);
|
||||
+asmlinkage void xen_coprocessor_error(void);
|
||||
+asmlinkage void xen_alignment_check(void);
|
||||
+#ifdef CONFIG_X86_MCE
|
||||
+asmlinkage void xen_machine_check(void);
|
||||
+#endif /* CONFIG_X86_MCE */
|
||||
+asmlinkage void xen_simd_coprocessor_error(void);
|
||||
+#endif
|
||||
+
|
||||
dotraplinkage void do_divide_error(struct pt_regs *, long);
|
||||
dotraplinkage void do_debug(struct pt_regs *, long);
|
||||
dotraplinkage void do_nmi(struct pt_regs *, long);
|
||||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
|
||||
index 70301ac0d414..c8a6d224f7ed 100644
|
||||
--- a/arch/x86/xen/xen-ops.h
|
||||
+++ b/arch/x86/xen/xen-ops.h
|
||||
@@ -138,7 +138,6 @@ __visible void xen_restore_fl_direct(unsigned long);
|
||||
__visible void xen_iret(void);
|
||||
__visible void xen_sysret32(void);
|
||||
__visible void xen_sysret64(void);
|
||||
-__visible void xen_adjust_exception_frame(void);
|
||||
|
||||
extern int xen_panic_handler_init(void);
|
||||
|
||||
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
|
||||
index 99332f550c48..cf42206926af 100644
|
||||
--- a/arch/x86/kernel/asm-offsets_64.c
|
||||
+++ b/arch/x86/kernel/asm-offsets_64.c
|
||||
@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
|
||||
int main(void)
|
||||
{
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
- OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
|
||||
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
|
||||
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
|
||||
BLANK();
|
||||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
|
||||
index bc0a849589bb..a14df9eecfed 100644
|
||||
--- a/arch/x86/kernel/paravirt.c
|
||||
+++ b/arch/x86/kernel/paravirt.c
|
||||
@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
|
||||
.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
|
||||
.safe_halt = native_safe_halt,
|
||||
.halt = native_halt,
|
||||
-#ifdef CONFIG_X86_64
|
||||
- .adjust_exception_frame = paravirt_nop,
|
||||
-#endif
|
||||
};
|
||||
|
||||
__visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index c76f5ff4d0d7..ae2a2e2d6362 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -586,6 +586,70 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
+struct trap_array_entry {
|
||||
+ void (*orig)(void);
|
||||
+ void (*xen)(void);
|
||||
+ bool ist_okay;
|
||||
+};
|
||||
+
|
||||
+static struct trap_array_entry trap_array[] = {
|
||||
+ { debug, xen_xendebug, true },
|
||||
+ { int3, xen_xenint3, true },
|
||||
+ { double_fault, xen_double_fault, true },
|
||||
+#ifdef CONFIG_X86_MCE
|
||||
+ { machine_check, xen_machine_check, true },
|
||||
+#endif
|
||||
+ { nmi, xen_nmi, true },
|
||||
+ { overflow, xen_overflow, false },
|
||||
+#ifdef CONFIG_IA32_EMULATION
|
||||
+ { entry_INT80_compat, xen_entry_INT80_compat, false },
|
||||
+#endif
|
||||
+ { page_fault, xen_page_fault, false },
|
||||
+ { divide_error, xen_divide_error, false },
|
||||
+ { bounds, xen_bounds, false },
|
||||
+ { invalid_op, xen_invalid_op, false },
|
||||
+ { device_not_available, xen_device_not_available, false },
|
||||
+ { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
|
||||
+ { invalid_TSS, xen_invalid_TSS, false },
|
||||
+ { segment_not_present, xen_segment_not_present, false },
|
||||
+ { stack_segment, xen_stack_segment, false },
|
||||
+ { general_protection, xen_general_protection, false },
|
||||
+ { spurious_interrupt_bug, xen_spurious_interrupt_bug, false },
|
||||
+ { coprocessor_error, xen_coprocessor_error, false },
|
||||
+ { alignment_check, xen_alignment_check, false },
|
||||
+ { simd_coprocessor_error, xen_simd_coprocessor_error, false },
|
||||
+};
|
||||
+
|
||||
+static bool get_trap_addr(void **addr, unsigned int ist)
|
||||
+{
|
||||
+ unsigned int nr;
|
||||
+ bool ist_okay = false;
|
||||
+
|
||||
+ /*
|
||||
+ * Replace trap handler addresses by Xen specific ones.
|
||||
+ * Check for known traps using IST and whitelist them.
|
||||
+ * The debugger ones are the only ones we care about.
|
||||
+ * Xen will handle faults like double_fault, * so we should never see
|
||||
+ * them. Warn if there's an unexpected IST-using fault handler.
|
||||
+ */
|
||||
+ for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
|
||||
+ struct trap_array_entry *entry = trap_array + nr;
|
||||
+
|
||||
+ if (*addr == entry->orig) {
|
||||
+ *addr = entry->xen;
|
||||
+ ist_okay = entry->ist_okay;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (WARN_ON(ist != 0 && !ist_okay))
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
struct trap_info *info)
|
||||
{
|
||||
@@ -598,40 +662,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
|
||||
addr = gate_offset(val);
|
||||
#ifdef CONFIG_X86_64
|
||||
- /*
|
||||
- * Look for known traps using IST, and substitute them
|
||||
- * appropriately. The debugger ones are the only ones we care
|
||||
- * about. Xen will handle faults like double_fault,
|
||||
- * so we should never see them. Warn if
|
||||
- * there's an unexpected IST-using fault handler.
|
||||
- */
|
||||
- if (addr == (unsigned long)debug)
|
||||
- addr = (unsigned long)xen_debug;
|
||||
- else if (addr == (unsigned long)int3)
|
||||
- addr = (unsigned long)xen_int3;
|
||||
- else if (addr == (unsigned long)stack_segment)
|
||||
- addr = (unsigned long)xen_stack_segment;
|
||||
- else if (addr == (unsigned long)double_fault) {
|
||||
- /* Don't need to handle these */
|
||||
+ if (!get_trap_addr((void **)&addr, val->bits.ist))
|
||||
return 0;
|
||||
-#ifdef CONFIG_X86_MCE
|
||||
- } else if (addr == (unsigned long)machine_check) {
|
||||
- /*
|
||||
- * when xen hypervisor inject vMCE to guest,
|
||||
- * use native mce handler to handle it
|
||||
- */
|
||||
- ;
|
||||
-#endif
|
||||
- } else if (addr == (unsigned long)nmi)
|
||||
- /*
|
||||
- * Use the native version as well.
|
||||
- */
|
||||
- ;
|
||||
- else {
|
||||
- /* Some other trap using IST? */
|
||||
- if (WARN_ON(val->bits.ist != 0))
|
||||
- return 0;
|
||||
- }
|
||||
#endif /* CONFIG_X86_64 */
|
||||
info->address = addr;
|
||||
|
||||
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
|
||||
index 33e92955e09d..d4eff5676cfa 100644
|
||||
--- a/arch/x86/xen/irq.c
|
||||
+++ b/arch/x86/xen/irq.c
|
||||
@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
|
||||
|
||||
.safe_halt = xen_safe_halt,
|
||||
.halt = xen_halt,
|
||||
-#ifdef CONFIG_X86_64
|
||||
- .adjust_exception_frame = xen_adjust_exception_frame,
|
||||
-#endif
|
||||
};
|
||||
|
||||
void __init xen_init_irq_ops(void)
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index dfabcbf8e813..c12260ef3e4b 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -829,7 +829,6 @@ ENTRY(\sym)
|
||||
.endif
|
||||
|
||||
ASM_CLAC
|
||||
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
|
||||
.ifeq \has_error_code
|
||||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||
@@ -975,7 +974,7 @@ ENTRY(do_softirq_own_stack)
|
||||
ENDPROC(do_softirq_own_stack)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
|
||||
+idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
|
||||
|
||||
/*
|
||||
* A note on the "critical region" in our callback handler.
|
||||
@@ -1042,8 +1041,6 @@ ENTRY(xen_failsafe_callback)
|
||||
movq 8(%rsp), %r11
|
||||
addq $0x30, %rsp
|
||||
pushq $0 /* RIP */
|
||||
- pushq %r11
|
||||
- pushq %rcx
|
||||
UNWIND_HINT_IRET_REGS offset=8
|
||||
jmp general_protection
|
||||
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
|
||||
@@ -1074,9 +1071,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
||||
idtentry stack_segment do_stack_segment has_error_code=1
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
-idtentry xen_debug do_debug has_error_code=0
|
||||
-idtentry xen_int3 do_int3 has_error_code=0
|
||||
-idtentry xen_stack_segment do_stack_segment has_error_code=1
|
||||
+idtentry xendebug do_debug has_error_code=0
|
||||
+idtentry xenint3 do_int3 has_error_code=0
|
||||
#endif
|
||||
|
||||
idtentry general_protection do_general_protection has_error_code=1
|
||||
@@ -1240,20 +1236,9 @@ ENTRY(error_exit)
|
||||
END(error_exit)
|
||||
|
||||
/* Runs on exception stack */
|
||||
+/* XXX: broken on Xen PV */
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
- /*
|
||||
- * Fix up the exception frame if we're on Xen.
|
||||
- * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
|
||||
- * one value to the stack on native, so it may clobber the rdx
|
||||
- * scratch slot, but it won't clobber any of the important
|
||||
- * slots past it.
|
||||
- *
|
||||
- * Xen is a different story, because the Xen frame itself overlaps
|
||||
- * the "NMI executing" variable.
|
||||
- */
|
||||
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
-
|
||||
/*
|
||||
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
||||
* the iretq it performs will take us out of NMI context.
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index 5314d7b8e5ad..d8468ba24be0 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -293,7 +293,6 @@ ENTRY(entry_INT80_compat)
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
*/
|
||||
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
ASM_CLAC /* Do this early to minimize exposure */
|
||||
SWAPGS
|
||||
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index 3a3b6a211584..dae2cc33afb5 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -16,11 +16,42 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
-ENTRY(xen_adjust_exception_frame)
|
||||
- mov 8+0(%rsp), %rcx
|
||||
- mov 8+8(%rsp), %r11
|
||||
- ret $16
|
||||
-ENDPROC(xen_adjust_exception_frame)
|
||||
+.macro xen_pv_trap name
|
||||
+ENTRY(xen_\name)
|
||||
+ pop %rcx
|
||||
+ pop %r11
|
||||
+ jmp \name
|
||||
+END(xen_\name)
|
||||
+.endm
|
||||
+
|
||||
+xen_pv_trap divide_error
|
||||
+xen_pv_trap debug
|
||||
+xen_pv_trap xendebug
|
||||
+xen_pv_trap int3
|
||||
+xen_pv_trap xenint3
|
||||
+xen_pv_trap nmi
|
||||
+xen_pv_trap overflow
|
||||
+xen_pv_trap bounds
|
||||
+xen_pv_trap invalid_op
|
||||
+xen_pv_trap device_not_available
|
||||
+xen_pv_trap double_fault
|
||||
+xen_pv_trap coprocessor_segment_overrun
|
||||
+xen_pv_trap invalid_TSS
|
||||
+xen_pv_trap segment_not_present
|
||||
+xen_pv_trap stack_segment
|
||||
+xen_pv_trap general_protection
|
||||
+xen_pv_trap page_fault
|
||||
+xen_pv_trap spurious_interrupt_bug
|
||||
+xen_pv_trap coprocessor_error
|
||||
+xen_pv_trap alignment_check
|
||||
+#ifdef CONFIG_X86_MCE
|
||||
+xen_pv_trap machine_check
|
||||
+#endif /* CONFIG_X86_MCE */
|
||||
+xen_pv_trap simd_coprocessor_error
|
||||
+#ifdef CONFIG_IA32_EMULATION
|
||||
+xen_pv_trap entry_INT80_compat
|
||||
+#endif
|
||||
+xen_pv_trap hypervisor_callback
|
||||
|
||||
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,391 @@
|
||||
From bbb647f65a627420f8c3351b34d14490a9878509 Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Mon, 4 Sep 2017 12:25:27 +0200
|
||||
Subject: [PATCH 030/231] x86/paravirt: Remove no longer used paravirt
|
||||
functions
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
With removal of lguest some of the paravirt functions are no longer
|
||||
needed:
|
||||
|
||||
->read_cr4()
|
||||
->store_idt()
|
||||
->set_pmd_at()
|
||||
->set_pud_at()
|
||||
->pte_update()
|
||||
|
||||
Remove them.
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: akataria@vmware.com
|
||||
Cc: boris.ostrovsky@oracle.com
|
||||
Cc: chrisw@sous-sol.org
|
||||
Cc: jeremy@goop.org
|
||||
Cc: rusty@rustcorp.com.au
|
||||
Cc: virtualization@lists.linux-foundation.org
|
||||
Cc: xen-devel@lists.xenproject.org
|
||||
Link: http://lkml.kernel.org/r/20170904102527.25409-1-jgross@suse.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 87930019c713873a1c3b9bd55dde46e81f70c8f1)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit edf3ab0080a6e79a300753e66929b0b7499eaec5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/desc.h | 3 +--
|
||||
arch/x86/include/asm/paravirt.h | 37 -----------------------------------
|
||||
arch/x86/include/asm/paravirt_types.h | 9 ---------
|
||||
arch/x86/include/asm/pgtable.h | 27 ++++---------------------
|
||||
arch/x86/include/asm/special_insns.h | 10 +++++-----
|
||||
arch/x86/kernel/paravirt.c | 5 -----
|
||||
arch/x86/kvm/vmx.c | 2 +-
|
||||
arch/x86/mm/pgtable.c | 7 +------
|
||||
arch/x86/xen/enlighten_pv.c | 2 --
|
||||
arch/x86/xen/mmu_pv.c | 2 --
|
||||
10 files changed, 12 insertions(+), 92 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
|
||||
index 57e502a4e92f..f995e5a09136 100644
|
||||
--- a/arch/x86/include/asm/desc.h
|
||||
+++ b/arch/x86/include/asm/desc.h
|
||||
@@ -120,7 +120,6 @@ static inline int desc_empty(const void *ptr)
|
||||
#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
|
||||
|
||||
#define store_gdt(dtr) native_store_gdt(dtr)
|
||||
-#define store_idt(dtr) native_store_idt(dtr)
|
||||
#define store_tr(tr) (tr = native_store_tr())
|
||||
|
||||
#define load_TLS(t, cpu) native_load_tls(t, cpu)
|
||||
@@ -241,7 +240,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
|
||||
asm volatile("sgdt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
-static inline void native_store_idt(struct desc_ptr *dtr)
|
||||
+static inline void store_idt(struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("sidt %0":"=m" (*dtr));
|
||||
}
|
||||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
|
||||
index c25dd22f7c70..12deec722cf0 100644
|
||||
--- a/arch/x86/include/asm/paravirt.h
|
||||
+++ b/arch/x86/include/asm/paravirt.h
|
||||
@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
|
||||
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
|
||||
}
|
||||
|
||||
-static inline unsigned long __read_cr4(void)
|
||||
-{
|
||||
- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
|
||||
-}
|
||||
-
|
||||
static inline void __write_cr4(unsigned long x)
|
||||
{
|
||||
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
|
||||
@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
|
||||
{
|
||||
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
|
||||
}
|
||||
-static inline void store_idt(struct desc_ptr *dtr)
|
||||
-{
|
||||
- PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
|
||||
-}
|
||||
static inline unsigned long paravirt_store_tr(void)
|
||||
{
|
||||
return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
|
||||
@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
|
||||
PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
|
||||
}
|
||||
|
||||
-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
|
||||
- pte_t *ptep)
|
||||
-{
|
||||
- PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
|
||||
-}
|
||||
-
|
||||
static inline pte_t __pte(pteval_t val)
|
||||
{
|
||||
pteval_t ret;
|
||||
@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
|
||||
}
|
||||
|
||||
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pmd_t *pmdp, pmd_t pmd)
|
||||
-{
|
||||
- if (sizeof(pmdval_t) > sizeof(long))
|
||||
- /* 5 arg words */
|
||||
- pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
|
||||
- else
|
||||
- PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
|
||||
- native_pmd_val(pmd));
|
||||
-}
|
||||
-
|
||||
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pud_t *pudp, pud_t pud)
|
||||
-{
|
||||
- if (sizeof(pudval_t) > sizeof(long))
|
||||
- /* 5 arg words */
|
||||
- pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
|
||||
- else
|
||||
- PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
|
||||
- native_pud_val(pud));
|
||||
-}
|
||||
-
|
||||
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
pmdval_t val = native_pmd_val(pmd);
|
||||
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
|
||||
index 6b64fc6367f2..42873edd9f9d 100644
|
||||
--- a/arch/x86/include/asm/paravirt_types.h
|
||||
+++ b/arch/x86/include/asm/paravirt_types.h
|
||||
@@ -107,7 +107,6 @@ struct pv_cpu_ops {
|
||||
unsigned long (*read_cr0)(void);
|
||||
void (*write_cr0)(unsigned long);
|
||||
|
||||
- unsigned long (*read_cr4)(void);
|
||||
void (*write_cr4)(unsigned long);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -119,8 +118,6 @@ struct pv_cpu_ops {
|
||||
void (*load_tr_desc)(void);
|
||||
void (*load_gdt)(const struct desc_ptr *);
|
||||
void (*load_idt)(const struct desc_ptr *);
|
||||
- /* store_gdt has been removed. */
|
||||
- void (*store_idt)(struct desc_ptr *);
|
||||
void (*set_ldt)(const void *desc, unsigned entries);
|
||||
unsigned long (*store_tr)(void);
|
||||
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
|
||||
@@ -245,12 +242,6 @@ struct pv_mmu_ops {
|
||||
void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval);
|
||||
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
|
||||
- void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
|
||||
- pmd_t *pmdp, pmd_t pmdval);
|
||||
- void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
|
||||
- pud_t *pudp, pud_t pudval);
|
||||
- void (*pte_update)(struct mm_struct *mm, unsigned long addr,
|
||||
- pte_t *ptep);
|
||||
|
||||
pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep);
|
||||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
|
||||
index 77037b6f1caa..bb8e9ea7deb4 100644
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -43,8 +43,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
||||
#else /* !CONFIG_PARAVIRT */
|
||||
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
|
||||
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
|
||||
-#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
|
||||
-#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
|
||||
|
||||
#define set_pte_atomic(ptep, pte) \
|
||||
native_set_pte_atomic(ptep, pte)
|
||||
@@ -75,8 +73,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
||||
#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
|
||||
#define pmd_clear(pmd) native_pmd_clear(pmd)
|
||||
|
||||
-#define pte_update(mm, addr, ptep) do { } while (0)
|
||||
-
|
||||
#define pgd_val(x) native_pgd_val(x)
|
||||
#define __pgd(x) native_make_pgd(x)
|
||||
|
||||
@@ -965,31 +961,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
native_set_pte(ptep, pte);
|
||||
}
|
||||
|
||||
-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pmd_t *pmdp , pmd_t pmd)
|
||||
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
+ pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
native_set_pmd(pmdp, pmd);
|
||||
}
|
||||
|
||||
-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||
- pud_t *pudp, pud_t pud)
|
||||
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||
+ pud_t *pudp, pud_t pud)
|
||||
{
|
||||
native_set_pud(pudp, pud);
|
||||
}
|
||||
|
||||
-#ifndef CONFIG_PARAVIRT
|
||||
-/*
|
||||
- * Rules for using pte_update - it must be called after any PTE update which
|
||||
- * has not been done using the set_pte / clear_pte interfaces. It is used by
|
||||
- * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
|
||||
- * updates should either be sets, clears, or set_pte_atomic for P->P
|
||||
- * transitions, which means this hook should only be called for user PTEs.
|
||||
- * This hook implies a P->P protection or access change has taken place, which
|
||||
- * requires a subsequent TLB flush.
|
||||
- */
|
||||
-#define pte_update(mm, addr, ptep) do { } while (0)
|
||||
-#endif
|
||||
-
|
||||
/*
|
||||
* We only update the dirty/accessed state if we set
|
||||
* the dirty bit by hand in the kernel, since the hardware
|
||||
@@ -1017,7 +1000,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
pte_t pte = native_ptep_get_and_clear(ptep);
|
||||
- pte_update(mm, addr, ptep);
|
||||
return pte;
|
||||
}
|
||||
|
||||
@@ -1044,7 +1026,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
|
||||
- pte_update(mm, addr, ptep);
|
||||
}
|
||||
|
||||
#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
|
||||
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
|
||||
index 9efaabf5b54b..a24dfcf79f4a 100644
|
||||
--- a/arch/x86/include/asm/special_insns.h
|
||||
+++ b/arch/x86/include/asm/special_insns.h
|
||||
@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
|
||||
|
||||
extern asmlinkage void native_load_gs_index(unsigned);
|
||||
|
||||
+static inline unsigned long __read_cr4(void)
|
||||
+{
|
||||
+ return native_read_cr4();
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
|
||||
native_write_cr3(x);
|
||||
}
|
||||
|
||||
-static inline unsigned long __read_cr4(void)
|
||||
-{
|
||||
- return native_read_cr4();
|
||||
-}
|
||||
-
|
||||
static inline void __write_cr4(unsigned long x)
|
||||
{
|
||||
native_write_cr4(x);
|
||||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
|
||||
index a14df9eecfed..19a3e8f961c7 100644
|
||||
--- a/arch/x86/kernel/paravirt.c
|
||||
+++ b/arch/x86/kernel/paravirt.c
|
||||
@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
.set_debugreg = native_set_debugreg,
|
||||
.read_cr0 = native_read_cr0,
|
||||
.write_cr0 = native_write_cr0,
|
||||
- .read_cr4 = native_read_cr4,
|
||||
.write_cr4 = native_write_cr4,
|
||||
#ifdef CONFIG_X86_64
|
||||
.read_cr8 = native_read_cr8,
|
||||
@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
.set_ldt = native_set_ldt,
|
||||
.load_gdt = native_load_gdt,
|
||||
.load_idt = native_load_idt,
|
||||
- .store_idt = native_store_idt,
|
||||
.store_tr = native_store_tr,
|
||||
.load_tls = native_load_tls,
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
|
||||
.set_pte = native_set_pte,
|
||||
.set_pte_at = native_set_pte_at,
|
||||
.set_pmd = native_set_pmd,
|
||||
- .set_pmd_at = native_set_pmd_at,
|
||||
- .pte_update = paravirt_nop,
|
||||
|
||||
.ptep_modify_prot_start = __ptep_modify_prot_start,
|
||||
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
|
||||
@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
|
||||
.pmd_clear = native_pmd_clear,
|
||||
#endif
|
||||
.set_pud = native_set_pud,
|
||||
- .set_pud_at = native_set_pud_at,
|
||||
|
||||
.pmd_val = PTE_IDENT,
|
||||
.make_pmd = PTE_IDENT,
|
||||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
||||
index 7b447d126d17..dd4996a96c71 100644
|
||||
--- a/arch/x86/kvm/vmx.c
|
||||
+++ b/arch/x86/kvm/vmx.c
|
||||
@@ -5174,7 +5174,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
||||
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
|
||||
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
|
||||
|
||||
- native_store_idt(&dt);
|
||||
+ store_idt(&dt);
|
||||
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
|
||||
vmx->host_idt_base = dt.address;
|
||||
|
||||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
|
||||
index 508a708eb9a6..942391b5b639 100644
|
||||
--- a/arch/x86/mm/pgtable.c
|
||||
+++ b/arch/x86/mm/pgtable.c
|
||||
@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
|
||||
{
|
||||
int changed = !pte_same(*ptep, entry);
|
||||
|
||||
- if (changed && dirty) {
|
||||
+ if (changed && dirty)
|
||||
*ptep = entry;
|
||||
- pte_update(vma->vm_mm, address, ptep);
|
||||
- }
|
||||
|
||||
return changed;
|
||||
}
|
||||
@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
|
||||
(unsigned long *) &ptep->pte);
|
||||
|
||||
- if (ret)
|
||||
- pte_update(vma->vm_mm, addr, ptep);
|
||||
-
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index ae2a2e2d6362..69b9deff7e5c 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.read_cr0 = xen_read_cr0,
|
||||
.write_cr0 = xen_write_cr0,
|
||||
|
||||
- .read_cr4 = native_read_cr4,
|
||||
.write_cr4 = xen_write_cr4,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.alloc_ldt = xen_alloc_ldt,
|
||||
.free_ldt = xen_free_ldt,
|
||||
|
||||
- .store_idt = native_store_idt,
|
||||
.store_tr = xen_store_tr,
|
||||
|
||||
.write_ldt_entry = xen_write_ldt_entry,
|
||||
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
|
||||
index cab28cf2cffb..5f61b7e2e6b2 100644
|
||||
--- a/arch/x86/xen/mmu_pv.c
|
||||
+++ b/arch/x86/xen/mmu_pv.c
|
||||
@@ -2430,8 +2430,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||
.flush_tlb_single = xen_flush_tlb_single,
|
||||
.flush_tlb_others = xen_flush_tlb_others,
|
||||
|
||||
- .pte_update = paravirt_nop,
|
||||
-
|
||||
.pgd_alloc = xen_pgd_alloc,
|
||||
.pgd_free = xen_pgd_free,
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
53
patches/kernel/0031-x86-entry-Fix-idtentry-unwind-hint.patch
Normal file
53
patches/kernel/0031-x86-entry-Fix-idtentry-unwind-hint.patch
Normal file
@ -0,0 +1,53 @@
|
||||
From b368fed558634ffc92dba0d7d9e4e631d26cd92f Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 20 Oct 2017 11:21:33 -0500
|
||||
Subject: [PATCH 031/231] x86/entry: Fix idtentry unwind hint
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This fixes the following ORC warning in the 'int3' entry code:
|
||||
|
||||
WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b
|
||||
|
||||
The ORC metadata had the wrong stack offset for the iret registers.
|
||||
|
||||
Their location on the stack is dependent on whether the exception has an
|
||||
error code.
|
||||
|
||||
Reported-and-tested-by: Andrei Vagin <avagin@virtuozzo.com>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations")
|
||||
Link: http://lkml.kernel.org/r/931d57f0551ed7979d5e7e05370d445c8e5137f8.1508516398.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 98990a33b77dda9babf91cb235654f6729e5702e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 266be2a5053230f6d0b6f27d3e8e9f28df40dd7e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index c12260ef3e4b..2e4fc6425f47 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -821,7 +821,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
ENTRY(\sym)
|
||||
- UNWIND_HINT_IRET_REGS offset=8
|
||||
+ UNWIND_HINT_IRET_REGS offset=\has_error_code*8
|
||||
|
||||
/* Sanity check */
|
||||
.if \shift_ist != -1 && \paranoid == 0
|
||||
--
|
||||
2.14.2
|
||||
|
237
patches/kernel/0032-x86-mm-64-Initialize-CR4.PCIDE-early.patch
Normal file
237
patches/kernel/0032-x86-mm-64-Initialize-CR4.PCIDE-early.patch
Normal file
@ -0,0 +1,237 @@
|
||||
From d9fd6653e5dd9d80c7c75065329250529281e02d Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sun, 10 Sep 2017 17:48:27 -0700
|
||||
Subject: [PATCH 032/231] x86/mm/64: Initialize CR4.PCIDE early
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
cpu_init() is weird: it's called rather late (after early
|
||||
identification and after most MMU state is initialized) on the boot
|
||||
CPU but is called extremely early (before identification) on secondary
|
||||
CPUs. It's called just late enough on the boot CPU that its CR4 value
|
||||
isn't propagated to mmu_cr4_features.
|
||||
|
||||
Even if we put CR4.PCIDE into mmu_cr4_features, we'd hit two
|
||||
problems. First, we'd crash in the trampoline code. That's
|
||||
fixable, and I tried that. It turns out that mmu_cr4_features is
|
||||
totally ignored by secondary_start_64(), though, so even with the
|
||||
trampoline code fixed, it wouldn't help.
|
||||
|
||||
This means that we don't currently have CR4.PCIDE reliably initialized
|
||||
before we start playing with cpu_tlbstate. This is very fragile and
|
||||
tends to cause boot failures if I make even small changes to the TLB
|
||||
handling code.
|
||||
|
||||
Make it more robust: initialize CR4.PCIDE earlier on the boot CPU
|
||||
and propagate it to secondary CPUs in start_secondary().
|
||||
|
||||
( Yes, this is ugly. I think we should have improved mmu_cr4_features
|
||||
to actually control CR4 during secondary bootup, but that would be
|
||||
fairly intrusive at this stage. )
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reported-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
|
||||
Tested-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-kernel@vger.kernel.org
|
||||
Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit c7ad5ad297e644601747d6dbee978bf85e14f7bc)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0e6a37a43aa876327e7d21881c09977da2d5c270)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 49 +++++++-------------------------------------
|
||||
arch/x86/kernel/setup.c | 5 ++++-
|
||||
arch/x86/kernel/smpboot.c | 8 +++++---
|
||||
arch/x86/mm/init.c | 34 ++++++++++++++++++++++++++++++
|
||||
4 files changed, 50 insertions(+), 46 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index 0b80ed14ff52..4be7b209a3d6 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
|
||||
__setup("nompx", x86_mpx_setup);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
-static int __init x86_pcid_setup(char *s)
|
||||
+static int __init x86_nopcid_setup(char *s)
|
||||
{
|
||||
- /* require an exact match without trailing characters */
|
||||
- if (strlen(s))
|
||||
- return 0;
|
||||
+ /* nopcid doesn't accept parameters */
|
||||
+ if (s)
|
||||
+ return -EINVAL;
|
||||
|
||||
/* do not emit a message if the feature is not present */
|
||||
if (!boot_cpu_has(X86_FEATURE_PCID))
|
||||
- return 1;
|
||||
+ return 0;
|
||||
|
||||
setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
pr_info("nopcid: PCID feature disabled\n");
|
||||
- return 1;
|
||||
+ return 0;
|
||||
}
|
||||
-__setup("nopcid", x86_pcid_setup);
|
||||
+early_param("nopcid", x86_nopcid_setup);
|
||||
#endif
|
||||
|
||||
static int __init x86_noinvpcid_setup(char *s)
|
||||
@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
-static void setup_pcid(struct cpuinfo_x86 *c)
|
||||
-{
|
||||
- if (cpu_has(c, X86_FEATURE_PCID)) {
|
||||
- if (cpu_has(c, X86_FEATURE_PGE)) {
|
||||
- /*
|
||||
- * We'd like to use cr4_set_bits_and_update_boot(),
|
||||
- * but we can't. CR4.PCIDE is special and can only
|
||||
- * be set in long mode, and the early CPU init code
|
||||
- * doesn't know this and would try to restore CR4.PCIDE
|
||||
- * prior to entering long mode.
|
||||
- *
|
||||
- * Instead, we rely on the fact that hotplug, resume,
|
||||
- * etc all fully restore CR4 before they write anything
|
||||
- * that could have nonzero PCID bits to CR3. CR4.PCIDE
|
||||
- * has no effect on the page tables themselves, so we
|
||||
- * don't need it to be restored early.
|
||||
- */
|
||||
- cr4_set_bits(X86_CR4_PCIDE);
|
||||
- } else {
|
||||
- /*
|
||||
- * flush_tlb_all(), as currently implemented, won't
|
||||
- * work if PCID is on but PGE is not. Since that
|
||||
- * combination doesn't exist on real hardware, there's
|
||||
- * no reason to try to fully support it, but it's
|
||||
- * polite to avoid corrupting data if we're on
|
||||
- * an improperly configured VM.
|
||||
- */
|
||||
- clear_cpu_cap(c, X86_FEATURE_PCID);
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Protection Keys are not available in 32-bit mode.
|
||||
*/
|
||||
@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
setup_smep(c);
|
||||
setup_smap(c);
|
||||
|
||||
- /* Set up PCID */
|
||||
- setup_pcid(c);
|
||||
-
|
||||
/*
|
||||
* The vendor-specific functions might have changed features.
|
||||
* Now we do "generic changes."
|
||||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
|
||||
index d7e8b983aa72..f964bfddfefd 100644
|
||||
--- a/arch/x86/kernel/setup.c
|
||||
+++ b/arch/x86/kernel/setup.c
|
||||
@@ -1174,8 +1174,11 @@ void __init setup_arch(char **cmdline_p)
|
||||
* with the current CR4 value. This may not be necessary, but
|
||||
* auditing all the early-boot CR4 manipulation would be needed to
|
||||
* rule it out.
|
||||
+ *
|
||||
+ * Mask off features that don't work outside long mode (just
|
||||
+ * PCIDE for now).
|
||||
*/
|
||||
- mmu_cr4_features = __read_cr4();
|
||||
+ mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
|
||||
|
||||
memblock_set_current_limit(get_max_mapped());
|
||||
|
||||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
|
||||
index 893fd8c849e2..d05006f6c31c 100644
|
||||
--- a/arch/x86/kernel/smpboot.c
|
||||
+++ b/arch/x86/kernel/smpboot.c
|
||||
@@ -227,10 +227,12 @@ static int enable_start_cpu0;
|
||||
static void notrace start_secondary(void *unused)
|
||||
{
|
||||
/*
|
||||
- * Don't put *anything* before cpu_init(), SMP booting is too
|
||||
- * fragile that we want to limit the things done here to the
|
||||
- * most necessary things.
|
||||
+ * Don't put *anything* except direct CPU state initialization
|
||||
+ * before cpu_init(), SMP booting is too fragile that we want to
|
||||
+ * limit the things done here to the most necessary things.
|
||||
*/
|
||||
+ if (boot_cpu_has(X86_FEATURE_PCID))
|
||||
+ __write_cr4(__read_cr4() | X86_CR4_PCIDE);
|
||||
cpu_init();
|
||||
x86_cpuinit.early_percpu_clock_init();
|
||||
preempt_disable();
|
||||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
||||
index bf3f1065d6ad..df2624b091a7 100644
|
||||
--- a/arch/x86/mm/init.c
|
||||
+++ b/arch/x86/mm/init.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/kaslr.h>
|
||||
#include <asm/hypervisor.h>
|
||||
+#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* We need to define the tracepoints somewhere, and tlb.c
|
||||
@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
|
||||
}
|
||||
}
|
||||
|
||||
+static void setup_pcid(void)
|
||||
+{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
+ /*
|
||||
+ * This can't be cr4_set_bits_and_update_boot() --
|
||||
+ * the trampoline code can't handle CR4.PCIDE and
|
||||
+ * it wouldn't do any good anyway. Despite the name,
|
||||
+ * cr4_set_bits_and_update_boot() doesn't actually
|
||||
+ * cause the bits in question to remain set all the
|
||||
+ * way through the secondary boot asm.
|
||||
+ *
|
||||
+ * Instead, we brute-force it and set CR4.PCIDE
|
||||
+ * manually in start_secondary().
|
||||
+ */
|
||||
+ cr4_set_bits(X86_CR4_PCIDE);
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * flush_tlb_all(), as currently implemented, won't
|
||||
+ * work if PCID is on but PGE is not. Since that
|
||||
+ * combination doesn't exist on real hardware, there's
|
||||
+ * no reason to try to fully support it, but it's
|
||||
+ * polite to avoid corrupting data if we're on
|
||||
+ * an improperly configured VM.
|
||||
+ */
|
||||
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
+ }
|
||||
+ }
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_X86_32
|
||||
#define NR_RANGE_MR 3
|
||||
#else /* CONFIG_X86_64 */
|
||||
@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
|
||||
unsigned long end;
|
||||
|
||||
probe_page_size_mask();
|
||||
+ setup_pcid();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
end = max_pfn << PAGE_SHIFT;
|
||||
--
|
||||
2.14.2
|
||||
|
1339
patches/kernel/0033-objtool-Add-ORC-unwind-table-generation.patch
Normal file
1339
patches/kernel/0033-objtool-Add-ORC-unwind-table-generation.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,641 @@
|
||||
From 338c7d8678b82c46668ce3b73f7339f71ab69cc8 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 11 Jul 2017 10:33:43 -0500
|
||||
Subject: [PATCH 034/231] objtool, x86: Add facility for asm code to provide
|
||||
unwind hints
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Some asm (and inline asm) code does special things to the stack which
|
||||
objtool can't understand. (Nor can GCC or GNU assembler, for that
|
||||
matter.) In such cases we need a facility for the code to provide
|
||||
annotations, so the unwinder can unwind through it.
|
||||
|
||||
This provides such a facility, in the form of unwind hints. They're
|
||||
similar to the GNU assembler .cfi* directives, but they give more
|
||||
information, and are needed in far fewer places, because objtool can
|
||||
fill in the blanks by following branches and adjusting the stack pointer
|
||||
for pushes and pops.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/0f5f3c9104fca559ff4088bece1d14ae3bca52d5.1499786555.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 39358a033b2e4432052265c1fa0f36f572d8cfb5)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a1fed2e10e84d48643a09861c2d127968621813e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
tools/objtool/Makefile | 3 +
|
||||
arch/x86/include/asm/orc_types.h | 107 ++++++++++++++++++++
|
||||
arch/x86/include/asm/unwind_hints.h | 103 +++++++++++++++++++
|
||||
tools/objtool/check.h | 4 +-
|
||||
tools/objtool/orc_types.h | 22 +++++
|
||||
tools/objtool/check.c | 191 +++++++++++++++++++++++++++++++++---
|
||||
6 files changed, 417 insertions(+), 13 deletions(-)
|
||||
create mode 100644 arch/x86/include/asm/orc_types.h
|
||||
create mode 100644 arch/x86/include/asm/unwind_hints.h
|
||||
|
||||
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
|
||||
index 0e2765e243c0..3a6425fefc43 100644
|
||||
--- a/tools/objtool/Makefile
|
||||
+++ b/tools/objtool/Makefile
|
||||
@@ -52,6 +52,9 @@ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
|
||||
diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
|
||||
diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
|
||||
|| echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
|
||||
+ @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
|
||||
+ diff ../../arch/x86/include/asm/orc_types.h orc_types.h >/dev/null) \
|
||||
+ || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
|
||||
$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
|
||||
|
||||
|
||||
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
|
||||
new file mode 100644
|
||||
index 000000000000..7dc777a6cb40
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/include/asm/orc_types.h
|
||||
@@ -0,0 +1,107 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
+ *
|
||||
+ * This program is free software; you can redistribute it and/or
|
||||
+ * modify it under the terms of the GNU General Public License
|
||||
+ * as published by the Free Software Foundation; either version 2
|
||||
+ * of the License, or (at your option) any later version.
|
||||
+ *
|
||||
+ * This program is distributed in the hope that it will be useful,
|
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+ * GNU General Public License for more details.
|
||||
+ *
|
||||
+ * You should have received a copy of the GNU General Public License
|
||||
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
+ */
|
||||
+
|
||||
+#ifndef _ORC_TYPES_H
|
||||
+#define _ORC_TYPES_H
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <linux/compiler.h>
|
||||
+
|
||||
+/*
|
||||
+ * The ORC_REG_* registers are base registers which are used to find other
|
||||
+ * registers on the stack.
|
||||
+ *
|
||||
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
|
||||
+ * address of the previous frame: the caller's SP before it called the current
|
||||
+ * function.
|
||||
+ *
|
||||
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
|
||||
+ * the current frame.
|
||||
+ *
|
||||
+ * The most commonly used base registers are SP and BP -- which the previous SP
|
||||
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
|
||||
+ * usually based on.
|
||||
+ *
|
||||
+ * The rest of the base registers are needed for special cases like entry code
|
||||
+ * and GCC realigned stacks.
|
||||
+ */
|
||||
+#define ORC_REG_UNDEFINED 0
|
||||
+#define ORC_REG_PREV_SP 1
|
||||
+#define ORC_REG_DX 2
|
||||
+#define ORC_REG_DI 3
|
||||
+#define ORC_REG_BP 4
|
||||
+#define ORC_REG_SP 5
|
||||
+#define ORC_REG_R10 6
|
||||
+#define ORC_REG_R13 7
|
||||
+#define ORC_REG_BP_INDIRECT 8
|
||||
+#define ORC_REG_SP_INDIRECT 9
|
||||
+#define ORC_REG_MAX 15
|
||||
+
|
||||
+/*
|
||||
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
|
||||
+ * caller's SP right before it made the call). Used for all callable
|
||||
+ * functions, i.e. all C code and all callable asm functions.
|
||||
+ *
|
||||
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
|
||||
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
|
||||
+ *
|
||||
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
|
||||
+ * points to the iret return frame.
|
||||
+ *
|
||||
+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They
|
||||
+ * aren't used in struct orc_entry due to size and complexity constraints.
|
||||
+ * Objtool converts them to real types when it converts the hints to orc
|
||||
+ * entries.
|
||||
+ */
|
||||
+#define ORC_TYPE_CALL 0
|
||||
+#define ORC_TYPE_REGS 1
|
||||
+#define ORC_TYPE_REGS_IRET 2
|
||||
+#define UNWIND_HINT_TYPE_SAVE 3
|
||||
+#define UNWIND_HINT_TYPE_RESTORE 4
|
||||
+
|
||||
+#ifndef __ASSEMBLY__
|
||||
+/*
|
||||
+ * This struct is more or less a vastly simplified version of the DWARF Call
|
||||
+ * Frame Information standard. It contains only the necessary parts of DWARF
|
||||
+ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the
|
||||
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
|
||||
+ * the stack for a given code address. Each instance of the struct corresponds
|
||||
+ * to one or more code locations.
|
||||
+ */
|
||||
+struct orc_entry {
|
||||
+ s16 sp_offset;
|
||||
+ s16 bp_offset;
|
||||
+ unsigned sp_reg:4;
|
||||
+ unsigned bp_reg:4;
|
||||
+ unsigned type:2;
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * This struct is used by asm and inline asm code to manually annotate the
|
||||
+ * location of registers on the stack for the ORC unwinder.
|
||||
+ *
|
||||
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
|
||||
+ */
|
||||
+struct unwind_hint {
|
||||
+ u32 ip;
|
||||
+ s16 sp_offset;
|
||||
+ u8 sp_reg;
|
||||
+ u8 type;
|
||||
+};
|
||||
+#endif /* __ASSEMBLY__ */
|
||||
+
|
||||
+#endif /* _ORC_TYPES_H */
|
||||
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
|
||||
new file mode 100644
|
||||
index 000000000000..5e02b11c9b86
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/include/asm/unwind_hints.h
|
||||
@@ -0,0 +1,103 @@
|
||||
+#ifndef _ASM_X86_UNWIND_HINTS_H
|
||||
+#define _ASM_X86_UNWIND_HINTS_H
|
||||
+
|
||||
+#include "orc_types.h"
|
||||
+
|
||||
+#ifdef __ASSEMBLY__
|
||||
+
|
||||
+/*
|
||||
+ * In asm, there are two kinds of code: normal C-type callable functions and
|
||||
+ * the rest. The normal callable functions can be called by other code, and
|
||||
+ * don't do anything unusual with the stack. Such normal callable functions
|
||||
+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
|
||||
+ * category. In this case, no special debugging annotations are needed because
|
||||
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
|
||||
+ * at runtime.
|
||||
+ *
|
||||
+ * Anything which doesn't fall into the above category, such as syscall and
|
||||
+ * interrupt handlers, tends to not be called directly by other functions, and
|
||||
+ * often does unusual non-C-function-type things with the stack pointer. Such
|
||||
+ * code needs to be annotated such that objtool can understand it. The
|
||||
+ * following CFI hint macros are for this type of code.
|
||||
+ *
|
||||
+ * These macros provide hints to objtool about the state of the stack at each
|
||||
+ * instruction. Objtool starts from the hints and follows the code flow,
|
||||
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
|
||||
+ * the debuginfo as necessary. It will also warn if it sees any
|
||||
+ * inconsistencies.
|
||||
+ */
|
||||
+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
|
||||
+#ifdef CONFIG_STACK_VALIDATION
|
||||
+.Lunwind_hint_ip_\@:
|
||||
+ .pushsection .discard.unwind_hints
|
||||
+ /* struct unwind_hint */
|
||||
+ .long .Lunwind_hint_ip_\@ - .
|
||||
+ .short \sp_offset
|
||||
+ .byte \sp_reg
|
||||
+ .byte \type
|
||||
+ .popsection
|
||||
+#endif
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_EMPTY
|
||||
+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
|
||||
+ .if \base == %rsp && \indirect
|
||||
+ .set sp_reg, ORC_REG_SP_INDIRECT
|
||||
+ .elseif \base == %rsp
|
||||
+ .set sp_reg, ORC_REG_SP
|
||||
+ .elseif \base == %rbp
|
||||
+ .set sp_reg, ORC_REG_BP
|
||||
+ .elseif \base == %rdi
|
||||
+ .set sp_reg, ORC_REG_DI
|
||||
+ .elseif \base == %rdx
|
||||
+ .set sp_reg, ORC_REG_DX
|
||||
+ .elseif \base == %r10
|
||||
+ .set sp_reg, ORC_REG_R10
|
||||
+ .else
|
||||
+ .error "UNWIND_HINT_REGS: bad base register"
|
||||
+ .endif
|
||||
+
|
||||
+ .set sp_offset, \offset
|
||||
+
|
||||
+ .if \iret
|
||||
+ .set type, ORC_TYPE_REGS_IRET
|
||||
+ .elseif \extra == 0
|
||||
+ .set type, ORC_TYPE_REGS_IRET
|
||||
+ .set sp_offset, \offset + (16*8)
|
||||
+ .else
|
||||
+ .set type, ORC_TYPE_REGS
|
||||
+ .endif
|
||||
+
|
||||
+ UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
|
||||
+ UNWIND_HINT_REGS base=\base offset=\offset iret=1
|
||||
+.endm
|
||||
+
|
||||
+.macro UNWIND_HINT_FUNC sp_offset=8
|
||||
+ UNWIND_HINT sp_offset=\sp_offset
|
||||
+.endm
|
||||
+
|
||||
+#else /* !__ASSEMBLY__ */
|
||||
+
|
||||
+#define UNWIND_HINT(sp_reg, sp_offset, type) \
|
||||
+ "987: \n\t" \
|
||||
+ ".pushsection .discard.unwind_hints\n\t" \
|
||||
+ /* struct unwind_hint */ \
|
||||
+ ".long 987b - .\n\t" \
|
||||
+ ".short " __stringify(sp_offset) "\n\t" \
|
||||
+ ".byte " __stringify(sp_reg) "\n\t" \
|
||||
+ ".byte " __stringify(type) "\n\t" \
|
||||
+ ".popsection\n\t"
|
||||
+
|
||||
+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
|
||||
+
|
||||
+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
|
||||
+
|
||||
+#endif /* __ASSEMBLY__ */
|
||||
+
|
||||
+#endif /* _ASM_X86_UNWIND_HINTS_H */
|
||||
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
|
||||
index 046874bbe226..ac3d4b13f17b 100644
|
||||
--- a/tools/objtool/check.h
|
||||
+++ b/tools/objtool/check.h
|
||||
@@ -43,7 +43,7 @@ struct instruction {
|
||||
unsigned int len;
|
||||
unsigned char type;
|
||||
unsigned long immediate;
|
||||
- bool alt_group, visited, dead_end, ignore;
|
||||
+ bool alt_group, visited, dead_end, ignore, hint, save, restore;
|
||||
struct symbol *call_dest;
|
||||
struct instruction *jump_dest;
|
||||
struct list_head alts;
|
||||
@@ -58,7 +58,7 @@ struct objtool_file {
|
||||
struct list_head insn_list;
|
||||
DECLARE_HASHTABLE(insn_hash, 16);
|
||||
struct section *rodata, *whitelist;
|
||||
- bool ignore_unreachables, c_file;
|
||||
+ bool ignore_unreachables, c_file, hints;
|
||||
};
|
||||
|
||||
int check(const char *objname, bool nofp, bool orc);
|
||||
diff --git a/tools/objtool/orc_types.h b/tools/objtool/orc_types.h
|
||||
index fc5cf6cffd9a..9c9dc579bd7d 100644
|
||||
--- a/tools/objtool/orc_types.h
|
||||
+++ b/tools/objtool/orc_types.h
|
||||
@@ -61,11 +61,19 @@
|
||||
*
|
||||
* ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
|
||||
* points to the iret return frame.
|
||||
+ *
|
||||
+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They
|
||||
+ * aren't used in struct orc_entry due to size and complexity constraints.
|
||||
+ * Objtool converts them to real types when it converts the hints to orc
|
||||
+ * entries.
|
||||
*/
|
||||
#define ORC_TYPE_CALL 0
|
||||
#define ORC_TYPE_REGS 1
|
||||
#define ORC_TYPE_REGS_IRET 2
|
||||
+#define UNWIND_HINT_TYPE_SAVE 3
|
||||
+#define UNWIND_HINT_TYPE_RESTORE 4
|
||||
|
||||
+#ifndef __ASSEMBLY__
|
||||
/*
|
||||
* This struct is more or less a vastly simplified version of the DWARF Call
|
||||
* Frame Information standard. It contains only the necessary parts of DWARF
|
||||
@@ -82,4 +90,18 @@ struct orc_entry {
|
||||
unsigned type:2;
|
||||
} __packed;
|
||||
|
||||
+/*
|
||||
+ * This struct is used by asm and inline asm code to manually annotate the
|
||||
+ * location of registers on the stack for the ORC unwinder.
|
||||
+ *
|
||||
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
|
||||
+ */
|
||||
+struct unwind_hint {
|
||||
+ u32 ip;
|
||||
+ s16 sp_offset;
|
||||
+ u8 sp_reg;
|
||||
+ u8 type;
|
||||
+};
|
||||
+#endif /* __ASSEMBLY__ */
|
||||
+
|
||||
#endif /* _ORC_TYPES_H */
|
||||
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
|
||||
index cb57c526ba17..368275de5f23 100644
|
||||
--- a/tools/objtool/check.c
|
||||
+++ b/tools/objtool/check.c
|
||||
@@ -100,7 +100,6 @@ static bool gcov_enabled(struct objtool_file *file)
|
||||
static bool ignore_func(struct objtool_file *file, struct symbol *func)
|
||||
{
|
||||
struct rela *rela;
|
||||
- struct instruction *insn;
|
||||
|
||||
/* check for STACK_FRAME_NON_STANDARD */
|
||||
if (file->whitelist && file->whitelist->rela)
|
||||
@@ -113,11 +112,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func)
|
||||
return true;
|
||||
}
|
||||
|
||||
- /* check if it has a context switching instruction */
|
||||
- func_for_each_insn(file, func, insn)
|
||||
- if (insn->type == INSN_CONTEXT_SWITCH)
|
||||
- return true;
|
||||
-
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -879,6 +873,99 @@ static int add_switch_table_alts(struct objtool_file *file)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int read_unwind_hints(struct objtool_file *file)
|
||||
+{
|
||||
+ struct section *sec, *relasec;
|
||||
+ struct rela *rela;
|
||||
+ struct unwind_hint *hint;
|
||||
+ struct instruction *insn;
|
||||
+ struct cfi_reg *cfa;
|
||||
+ int i;
|
||||
+
|
||||
+ sec = find_section_by_name(file->elf, ".discard.unwind_hints");
|
||||
+ if (!sec)
|
||||
+ return 0;
|
||||
+
|
||||
+ relasec = sec->rela;
|
||||
+ if (!relasec) {
|
||||
+ WARN("missing .rela.discard.unwind_hints section");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (sec->len % sizeof(struct unwind_hint)) {
|
||||
+ WARN("struct unwind_hint size mismatch");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ file->hints = true;
|
||||
+
|
||||
+ for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
|
||||
+ hint = (struct unwind_hint *)sec->data->d_buf + i;
|
||||
+
|
||||
+ rela = find_rela_by_dest(sec, i * sizeof(*hint));
|
||||
+ if (!rela) {
|
||||
+ WARN("can't find rela for unwind_hints[%d]", i);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ insn = find_insn(file, rela->sym->sec, rela->addend);
|
||||
+ if (!insn) {
|
||||
+ WARN("can't find insn for unwind_hints[%d]", i);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ cfa = &insn->state.cfa;
|
||||
+
|
||||
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
|
||||
+ insn->save = true;
|
||||
+ continue;
|
||||
+
|
||||
+ } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
|
||||
+ insn->restore = true;
|
||||
+ insn->hint = true;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ insn->hint = true;
|
||||
+
|
||||
+ switch (hint->sp_reg) {
|
||||
+ case ORC_REG_UNDEFINED:
|
||||
+ cfa->base = CFI_UNDEFINED;
|
||||
+ break;
|
||||
+ case ORC_REG_SP:
|
||||
+ cfa->base = CFI_SP;
|
||||
+ break;
|
||||
+ case ORC_REG_BP:
|
||||
+ cfa->base = CFI_BP;
|
||||
+ break;
|
||||
+ case ORC_REG_SP_INDIRECT:
|
||||
+ cfa->base = CFI_SP_INDIRECT;
|
||||
+ break;
|
||||
+ case ORC_REG_R10:
|
||||
+ cfa->base = CFI_R10;
|
||||
+ break;
|
||||
+ case ORC_REG_R13:
|
||||
+ cfa->base = CFI_R13;
|
||||
+ break;
|
||||
+ case ORC_REG_DI:
|
||||
+ cfa->base = CFI_DI;
|
||||
+ break;
|
||||
+ case ORC_REG_DX:
|
||||
+ cfa->base = CFI_DX;
|
||||
+ break;
|
||||
+ default:
|
||||
+ WARN_FUNC("unsupported unwind_hint sp base reg %d",
|
||||
+ insn->sec, insn->offset, hint->sp_reg);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ cfa->offset = hint->sp_offset;
|
||||
+ insn->state.type = hint->type;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int decode_sections(struct objtool_file *file)
|
||||
{
|
||||
int ret;
|
||||
@@ -909,6 +996,10 @@ static int decode_sections(struct objtool_file *file)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
+ ret = read_unwind_hints(file);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1382,7 +1473,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
struct insn_state state)
|
||||
{
|
||||
struct alternative *alt;
|
||||
- struct instruction *insn;
|
||||
+ struct instruction *insn, *next_insn;
|
||||
struct section *sec;
|
||||
struct symbol *func = NULL;
|
||||
int ret;
|
||||
@@ -1397,6 +1488,8 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
}
|
||||
|
||||
while (1) {
|
||||
+ next_insn = next_insn_same_sec(file, insn);
|
||||
+
|
||||
if (file->c_file && insn->func) {
|
||||
if (func && func != insn->func) {
|
||||
WARN("%s() falls through to next function %s()",
|
||||
@@ -1414,13 +1507,54 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
}
|
||||
|
||||
if (insn->visited) {
|
||||
- if (!!insn_state_match(insn, &state))
|
||||
+ if (!insn->hint && !insn_state_match(insn, &state))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
- insn->state = state;
|
||||
+ if (insn->hint) {
|
||||
+ if (insn->restore) {
|
||||
+ struct instruction *save_insn, *i;
|
||||
+
|
||||
+ i = insn;
|
||||
+ save_insn = NULL;
|
||||
+ func_for_each_insn_continue_reverse(file, func, i) {
|
||||
+ if (i->save) {
|
||||
+ save_insn = i;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!save_insn) {
|
||||
+ WARN_FUNC("no corresponding CFI save for CFI restore",
|
||||
+ sec, insn->offset);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ if (!save_insn->visited) {
|
||||
+ /*
|
||||
+ * Oops, no state to copy yet.
|
||||
+ * Hopefully we can reach this
|
||||
+ * instruction from another branch
|
||||
+ * after the save insn has been
|
||||
+ * visited.
|
||||
+ */
|
||||
+ if (insn == first)
|
||||
+ return 0;
|
||||
+
|
||||
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
|
||||
+ sec, insn->offset);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ insn->state = save_insn->state;
|
||||
+ }
|
||||
+
|
||||
+ state = insn->state;
|
||||
+
|
||||
+ } else
|
||||
+ insn->state = state;
|
||||
|
||||
insn->visited = true;
|
||||
|
||||
@@ -1497,6 +1631,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
|
||||
return 0;
|
||||
|
||||
+ case INSN_CONTEXT_SWITCH:
|
||||
+ if (func && (!next_insn || !next_insn->hint)) {
|
||||
+ WARN_FUNC("unsupported instruction in callable function",
|
||||
+ sec, insn->offset);
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+
|
||||
case INSN_STACK:
|
||||
if (update_insn_state(insn, &state))
|
||||
return -1;
|
||||
@@ -1510,7 +1652,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
if (insn->dead_end)
|
||||
return 0;
|
||||
|
||||
- insn = next_insn_same_sec(file, insn);
|
||||
+ insn = next_insn;
|
||||
if (!insn) {
|
||||
WARN("%s: unexpected end of section", sec->name);
|
||||
return 1;
|
||||
@@ -1520,6 +1662,27 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int validate_unwind_hints(struct objtool_file *file)
|
||||
+{
|
||||
+ struct instruction *insn;
|
||||
+ int ret, warnings = 0;
|
||||
+ struct insn_state state;
|
||||
+
|
||||
+ if (!file->hints)
|
||||
+ return 0;
|
||||
+
|
||||
+ clear_insn_state(&state);
|
||||
+
|
||||
+ for_each_insn(file, insn) {
|
||||
+ if (insn->hint && !insn->visited) {
|
||||
+ ret = validate_branch(file, insn, state);
|
||||
+ warnings += ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return warnings;
|
||||
+}
|
||||
+
|
||||
static bool is_kasan_insn(struct instruction *insn)
|
||||
{
|
||||
return (insn->type == INSN_CALL &&
|
||||
@@ -1665,8 +1828,9 @@ int check(const char *_objname, bool _nofp, bool orc)
|
||||
hash_init(file.insn_hash);
|
||||
file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
|
||||
file.rodata = find_section_by_name(file.elf, ".rodata");
|
||||
- file.ignore_unreachables = false;
|
||||
file.c_file = find_section_by_name(file.elf, ".comment");
|
||||
+ file.ignore_unreachables = false;
|
||||
+ file.hints = false;
|
||||
|
||||
arch_initial_func_cfi_state(&initial_func_cfi);
|
||||
|
||||
@@ -1683,6 +1847,11 @@ int check(const char *_objname, bool _nofp, bool orc)
|
||||
goto out;
|
||||
warnings += ret;
|
||||
|
||||
+ ret = validate_unwind_hints(&file);
|
||||
+ if (ret < 0)
|
||||
+ goto out;
|
||||
+ warnings += ret;
|
||||
+
|
||||
if (!warnings) {
|
||||
ret = validate_reachable_instructions(&file);
|
||||
if (ret < 0)
|
||||
--
|
||||
2.14.2
|
||||
|
1407
patches/kernel/0035-x86-unwind-Add-the-ORC-unwinder.patch
Normal file
1407
patches/kernel/0035-x86-unwind-Add-the-ORC-unwinder.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,171 @@
|
||||
From 73cf1dd35105d9cf270caf4a72b400b0a3ab4bb2 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 25 Jul 2017 08:54:24 -0500
|
||||
Subject: [PATCH 036/231] x86/kconfig: Consolidate unwinders into multiple
|
||||
choice selection
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
There are three mutually exclusive unwinders. Make that more obvious by
|
||||
combining them into a multiple-choice selection:
|
||||
|
||||
CONFIG_FRAME_POINTER_UNWINDER
|
||||
CONFIG_ORC_UNWINDER
|
||||
CONFIG_GUESS_UNWINDER (if CONFIG_EXPERT=y)
|
||||
|
||||
Frame pointers are still the default (for now).
|
||||
|
||||
The old CONFIG_FRAME_POINTER option is still used in some
|
||||
arch-independent places, so keep it around, but make it
|
||||
invisible to the user on x86 - it's now selected by
|
||||
CONFIG_FRAME_POINTER_UNWINDER=y.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: live-patching@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/20170725135424.zukjmgpz3plf5pmt@treble
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 81d387190039c14edac8de2b3ec789beb899afd9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 26ddacc1e6333555e4a6bd63c4c935b323509f92)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/unwind.h | 4 ++--
|
||||
arch/x86/Kconfig | 3 +--
|
||||
arch/x86/Kconfig.debug | 45 +++++++++++++++++++++++++++++++++++++------
|
||||
arch/x86/configs/tiny.config | 2 ++
|
||||
4 files changed, 44 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
|
||||
index 25b8d31a007d..e9f793e2df7a 100644
|
||||
--- a/arch/x86/include/asm/unwind.h
|
||||
+++ b/arch/x86/include/asm/unwind.h
|
||||
@@ -16,7 +16,7 @@ struct unwind_state {
|
||||
bool signal, full_regs;
|
||||
unsigned long sp, bp, ip;
|
||||
struct pt_regs *regs;
|
||||
-#elif defined(CONFIG_FRAME_POINTER)
|
||||
+#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
bool got_irq;
|
||||
unsigned long *bp, *orig_sp, ip;
|
||||
struct pt_regs *regs;
|
||||
@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
__unwind_start(state, task, regs, first_frame);
|
||||
}
|
||||
|
||||
-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER)
|
||||
+#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index d6f45f6d1054..3a0b8cb57caf 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -73,7 +73,6 @@ config X86
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
- select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||
select ARCH_WANTS_THP_SWAP if X86_64
|
||||
select BUILDTIME_EXTABLE_SORT
|
||||
@@ -169,7 +168,7 @@ config X86
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
- select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER && STACK_VALIDATION
|
||||
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index d5bca2ec8a74..c441b5d65ec8 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -356,6 +356,29 @@ config PUNIT_ATOM_DEBUG
|
||||
The current power state can be read from
|
||||
/sys/kernel/debug/punit_atom/dev_power_state
|
||||
|
||||
+choice
|
||||
+ prompt "Choose kernel unwinder"
|
||||
+ default FRAME_POINTER_UNWINDER
|
||||
+ ---help---
|
||||
+ This determines which method will be used for unwinding kernel stack
|
||||
+ traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||
+ livepatch, lockdep, and more.
|
||||
+
|
||||
+config FRAME_POINTER_UNWINDER
|
||||
+ bool "Frame pointer unwinder"
|
||||
+ select FRAME_POINTER
|
||||
+ ---help---
|
||||
+ This option enables the frame pointer unwinder for unwinding kernel
|
||||
+ stack traces.
|
||||
+
|
||||
+ The unwinder itself is fast and it uses less RAM than the ORC
|
||||
+ unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
+ overall performance will degrade by roughly 5-10%.
|
||||
+
|
||||
+ This option is recommended if you want to use the livepatch
|
||||
+ consistency model, as this is currently the only way to get a
|
||||
+ reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
+
|
||||
config ORC_UNWINDER
|
||||
bool "ORC unwinder"
|
||||
depends on X86_64
|
||||
@@ -373,12 +396,22 @@ config ORC_UNWINDER
|
||||
Enabling this option will increase the kernel's runtime memory usage
|
||||
by roughly 2-4MB, depending on your kernel config.
|
||||
|
||||
-config FRAME_POINTER_UNWINDER
|
||||
- def_bool y
|
||||
- depends on !ORC_UNWINDER && FRAME_POINTER
|
||||
-
|
||||
config GUESS_UNWINDER
|
||||
- def_bool y
|
||||
- depends on !ORC_UNWINDER && !FRAME_POINTER
|
||||
+ bool "Guess unwinder"
|
||||
+ depends on EXPERT
|
||||
+ ---help---
|
||||
+ This option enables the "guess" unwinder for unwinding kernel stack
|
||||
+ traces. It scans the stack and reports every kernel text address it
|
||||
+ finds. Some of the addresses it reports may be incorrect.
|
||||
+
|
||||
+ While this option often produces false positives, it can still be
|
||||
+ useful in many cases. Unlike the other unwinders, it has no runtime
|
||||
+ overhead.
|
||||
+
|
||||
+endchoice
|
||||
+
|
||||
+config FRAME_POINTER
|
||||
+ depends on !ORC_UNWINDER && !GUESS_UNWINDER
|
||||
+ bool
|
||||
|
||||
endmenu
|
||||
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
|
||||
index 4b429df40d7a..550cd5012b73 100644
|
||||
--- a/arch/x86/configs/tiny.config
|
||||
+++ b/arch/x86/configs/tiny.config
|
||||
@@ -1,3 +1,5 @@
|
||||
CONFIG_NOHIGHMEM=y
|
||||
# CONFIG_HIGHMEM4G is not set
|
||||
# CONFIG_HIGHMEM64G is not set
|
||||
+CONFIG_GUESS_UNWINDER=y
|
||||
+# CONFIG_FRAME_POINTER_UNWINDER is not set
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,51 @@
|
||||
From 2c9eb7028c0714c3379b58a59c60f7b5b7a5adb0 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 3 Oct 2017 20:10:36 -0500
|
||||
Subject: [PATCH 037/231] objtool: Upgrade libelf-devel warning to error for
|
||||
CONFIG_ORC_UNWINDER
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
With CONFIG_ORC_UNWINDER, if the user doesn't have libelf-devel
|
||||
installed, and they don't see the make warning, their ORC unwinder will
|
||||
be silently broken. Upgrade the warning to an error.
|
||||
|
||||
Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/d9dfc39fb8240998820f9efb233d283a1ee96084.1507079417.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 3dd40cb320fee7c23b574ab821ce140ccd1281c9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c413466a72ca533ec126ebc0c5bb579ae0c96b1d)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Makefile | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 8e14a926fc94..490ce18685ea 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -965,7 +965,11 @@ ifdef CONFIG_STACK_VALIDATION
|
||||
ifeq ($(has_libelf),1)
|
||||
objtool_target := tools/objtool FORCE
|
||||
else
|
||||
- $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ ifdef CONFIG_ORC_UNWINDER
|
||||
+ $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ else
|
||||
+ $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ endif
|
||||
SKIP_STACK_VALIDATION := 1
|
||||
export SKIP_STACK_VALIDATION
|
||||
endif
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,82 @@
|
||||
From 34aa933a9bce5fb9c88e6ed98b268cbf058e51eb Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Wed, 26 Jul 2017 07:16:30 -0700
|
||||
Subject: [PATCH 038/231] x86/ldt/64: Refresh DS and ES when modify_ldt changes
|
||||
an entry
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
On x86_32, modify_ldt() implicitly refreshes the cached DS and ES
|
||||
segments because they are refreshed on return to usermode.
|
||||
|
||||
On x86_64, they're not refreshed on return to usermode. To improve
|
||||
determinism and match x86_32's behavior, refresh them when we update
|
||||
the LDT.
|
||||
|
||||
This avoids a situation in which the DS points to a descriptor that is
|
||||
changed but the old cached segment persists until the next reschedule.
|
||||
If this happens, then the user-visible state will change
|
||||
nondeterministically some time after modify_ldt() returns, which is
|
||||
unfortunate.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Chang Seok <chang.seok.bae@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit a632375764aa25c97b78beb56c71b0ba59d1cf83)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 295cb0b06150958ec84ee4b8844ef7e389e22c4e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/ldt.c | 21 +++++++++++++++++++++
|
||||
1 file changed, 21 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
|
||||
index a870910c8565..f0e64db18ac8 100644
|
||||
--- a/arch/x86/kernel/ldt.c
|
||||
+++ b/arch/x86/kernel/ldt.c
|
||||
@@ -21,6 +21,25 @@
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/syscalls.h>
|
||||
|
||||
+static void refresh_ldt_segments(void)
|
||||
+{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ unsigned short sel;
|
||||
+
|
||||
+ /*
|
||||
+ * Make sure that the cached DS and ES descriptors match the updated
|
||||
+ * LDT.
|
||||
+ */
|
||||
+ savesegment(ds, sel);
|
||||
+ if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
|
||||
+ loadsegment(ds, sel);
|
||||
+
|
||||
+ savesegment(es, sel);
|
||||
+ if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
|
||||
+ loadsegment(es, sel);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/* context.lock is held for us, so we don't need any locking. */
|
||||
static void flush_ldt(void *__mm)
|
||||
{
|
||||
@@ -32,6 +51,8 @@ static void flush_ldt(void *__mm)
|
||||
|
||||
pc = &mm->context;
|
||||
set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
|
||||
+
|
||||
+ refresh_ldt_segments();
|
||||
}
|
||||
|
||||
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,182 @@
|
||||
From 09fedd9befc7affbfa9490ef3993d60c7d582a6f Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:15 -0700
|
||||
Subject: [PATCH 039/231] x86/mm: Give each mm TLB flush generation a unique ID
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This adds two new variables to mmu_context_t: ctx_id and tlb_gen.
|
||||
ctx_id uniquely identifies the mm_struct and will never be reused.
|
||||
For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic
|
||||
count of the number of times that a TLB flush has been requested.
|
||||
The pair (ctx_id, tlb_gen) can be used as an identifier for TLB
|
||||
flush actions and will be used in subsequent patches to reliably
|
||||
determine whether all needed TLB flushes have occurred on a given
|
||||
CPU.
|
||||
|
||||
This patch is split out for ease of review. By itself, it has no
|
||||
real effect other than creating and updating the new variables.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit f39681ed0f48498b80455095376f11535feea332)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit e566a0dfbb2a5f7ea90dd66ce384740372739e14)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu.h | 25 +++++++++++++++++++++++--
|
||||
arch/x86/include/asm/mmu_context.h | 6 ++++++
|
||||
arch/x86/include/asm/tlbflush.h | 18 ++++++++++++++++++
|
||||
arch/x86/mm/tlb.c | 6 ++++--
|
||||
4 files changed, 51 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
|
||||
index 79b647a7ebd0..bb8c597c2248 100644
|
||||
--- a/arch/x86/include/asm/mmu.h
|
||||
+++ b/arch/x86/include/asm/mmu.h
|
||||
@@ -3,12 +3,28 @@
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
+#include <linux/atomic.h>
|
||||
|
||||
/*
|
||||
- * The x86 doesn't have a mmu context, but
|
||||
- * we put the segment information here.
|
||||
+ * x86 has arch-specific MMU state beyond what lives in mm_struct.
|
||||
*/
|
||||
typedef struct {
|
||||
+ /*
|
||||
+ * ctx_id uniquely identifies this mm_struct. A ctx_id will never
|
||||
+ * be reused, and zero is not a valid ctx_id.
|
||||
+ */
|
||||
+ u64 ctx_id;
|
||||
+
|
||||
+ /*
|
||||
+ * Any code that needs to do any sort of TLB flushing for this
|
||||
+ * mm will first make its changes to the page tables, then
|
||||
+ * increment tlb_gen, then flush. This lets the low-level
|
||||
+ * flushing code keep track of what needs flushing.
|
||||
+ *
|
||||
+ * This is not used on Xen PV.
|
||||
+ */
|
||||
+ atomic64_t tlb_gen;
|
||||
+
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
#endif
|
||||
@@ -37,6 +53,11 @@ typedef struct {
|
||||
#endif
|
||||
} mm_context_t;
|
||||
|
||||
+#define INIT_MM_CONTEXT(mm) \
|
||||
+ .context = { \
|
||||
+ .ctx_id = 1, \
|
||||
+ }
|
||||
+
|
||||
void leave_mm(int cpu);
|
||||
|
||||
#endif /* _ASM_X86_MMU_H */
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index 7a234be7e298..6c05679c715b 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -12,6 +12,9 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/mpx.h>
|
||||
+
|
||||
+extern atomic64_t last_mm_ctx_id;
|
||||
+
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
static inline void paravirt_activate_mm(struct mm_struct *prev,
|
||||
struct mm_struct *next)
|
||||
@@ -132,6 +135,9 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
+ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
|
||||
+ atomic64_set(&mm->context.tlb_gen, 0);
|
||||
+
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
|
||||
/* pkey 0 is the default and always allocated */
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 2b3d68093235..f1f2e73b7b77 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -57,6 +57,23 @@ static inline void invpcid_flush_all_nonglobals(void)
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
|
||||
}
|
||||
|
||||
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
+{
|
||||
+ u64 new_tlb_gen;
|
||||
+
|
||||
+ /*
|
||||
+ * Bump the generation count. This also serves as a full barrier
|
||||
+ * that synchronizes with switch_mm(): callers are required to order
|
||||
+ * their read of mm_cpumask after their writes to the paging
|
||||
+ * structures.
|
||||
+ */
|
||||
+ smp_mb__before_atomic();
|
||||
+ new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
|
||||
+ smp_mb__after_atomic();
|
||||
+
|
||||
+ return new_tlb_gen;
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
@@ -270,6 +287,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
+ inc_mm_tlb_gen(mm);
|
||||
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
|
||||
}
|
||||
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 014d07a80053..14f4f8f66aa8 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -28,6 +28,8 @@
|
||||
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
||||
*/
|
||||
|
||||
+atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
|
||||
+
|
||||
void leave_mm(int cpu)
|
||||
{
|
||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
@@ -250,8 +252,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
|
||||
cpu = get_cpu();
|
||||
|
||||
- /* Synchronize with switch_mm. */
|
||||
- smp_mb();
|
||||
+ /* This is also a barrier that synchronizes with switch_mm(). */
|
||||
+ inc_mm_tlb_gen(mm);
|
||||
|
||||
/* Should we flush just the requested range? */
|
||||
if ((end != TLB_FLUSH_ALL) &&
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,279 @@
|
||||
From c1f19d153ad69363ac1bc62bbd9be05ca48c526c Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:16 -0700
|
||||
Subject: [PATCH 040/231] x86/mm: Track the TLB's tlb_gen and update the
|
||||
flushing algorithm
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
There are two kernel features that would benefit from tracking
|
||||
how up-to-date each CPU's TLB is in the case where IPIs aren't keeping
|
||||
it up to date in real time:
|
||||
|
||||
- Lazy mm switching currently works by switching to init_mm when
|
||||
it would otherwise flush. This is wasteful: there isn't fundamentally
|
||||
any need to update CR3 at all when going lazy or when returning from
|
||||
lazy mode, nor is there any need to receive flush IPIs at all. Instead,
|
||||
we should just stop trying to keep the TLB coherent when we go lazy and,
|
||||
when unlazying, check whether we missed any flushes.
|
||||
|
||||
- PCID will let us keep recent user contexts alive in the TLB. If we
|
||||
start doing this, we need a way to decide whether those contexts are
|
||||
up to date.
|
||||
|
||||
On some paravirt systems, remote TLBs can be flushed without IPIs.
|
||||
This won't update the target CPUs' tlb_gens, which may cause
|
||||
unnecessary local flushes later on. We can address this if it becomes
|
||||
a problem by carefully updating the target CPU's tlb_gen directly.
|
||||
|
||||
By itself, this patch is a very minor optimization that avoids
|
||||
unnecessary flushes when multiple TLB flushes targetting the same CPU
|
||||
race. The complexity in this patch would not be worth it on its own,
|
||||
but it will enable improved lazy TLB tracking and PCID.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/1210fb244bc9cbe7677f7f0b72db4d359675f24b.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit b0579ade7cd82391360e959cc844e50a160e8a96)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d34881c25f3c70228ed792fd62881185a25c4422)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/tlbflush.h | 43 +++++++++++++++--
|
||||
arch/x86/mm/tlb.c | 102 +++++++++++++++++++++++++++++++++++++---
|
||||
2 files changed, 135 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index f1f2e73b7b77..3a167c214560 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -82,6 +82,11 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
|
||||
#endif
|
||||
|
||||
+struct tlb_context {
|
||||
+ u64 ctx_id;
|
||||
+ u64 tlb_gen;
|
||||
+};
|
||||
+
|
||||
struct tlb_state {
|
||||
/*
|
||||
* cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
|
||||
@@ -97,6 +102,21 @@ struct tlb_state {
|
||||
* disabling interrupts when modifying either one.
|
||||
*/
|
||||
unsigned long cr4;
|
||||
+
|
||||
+ /*
|
||||
+ * This is a list of all contexts that might exist in the TLB.
|
||||
+ * Since we don't yet use PCID, there is only one context.
|
||||
+ *
|
||||
+ * For each context, ctx_id indicates which mm the TLB's user
|
||||
+ * entries came from. As an invariant, the TLB will never
|
||||
+ * contain entries that are out-of-date as when that mm reached
|
||||
+ * the tlb_gen in the list.
|
||||
+ *
|
||||
+ * To be clear, this means that it's legal for the TLB code to
|
||||
+ * flush the TLB without updating tlb_gen. This can happen
|
||||
+ * (for now, at least) due to paravirt remote flushes.
|
||||
+ */
|
||||
+ struct tlb_context ctxs[1];
|
||||
};
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
|
||||
|
||||
@@ -256,9 +276,26 @@ static inline void __flush_tlb_one(unsigned long addr)
|
||||
* and page-granular flushes are available only on i486 and up.
|
||||
*/
|
||||
struct flush_tlb_info {
|
||||
- struct mm_struct *mm;
|
||||
- unsigned long start;
|
||||
- unsigned long end;
|
||||
+ /*
|
||||
+ * We support several kinds of flushes.
|
||||
+ *
|
||||
+ * - Fully flush a single mm. .mm will be set, .end will be
|
||||
+ * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
|
||||
+ * which the IPI sender is trying to catch us up.
|
||||
+ *
|
||||
+ * - Partially flush a single mm. .mm will be set, .start and
|
||||
+ * .end will indicate the range, and .new_tlb_gen will be set
|
||||
+ * such that the changes between generation .new_tlb_gen-1 and
|
||||
+ * .new_tlb_gen are entirely contained in the indicated range.
|
||||
+ *
|
||||
+ * - Fully flush all mms whose tlb_gens have been updated. .mm
|
||||
+ * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
|
||||
+ * will be zero.
|
||||
+ */
|
||||
+ struct mm_struct *mm;
|
||||
+ unsigned long start;
|
||||
+ unsigned long end;
|
||||
+ u64 new_tlb_gen;
|
||||
};
|
||||
|
||||
#define local_flush_tlb() __flush_tlb()
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 14f4f8f66aa8..4e5a5ddb9e4d 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -105,6 +105,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
}
|
||||
|
||||
this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
|
||||
|
||||
WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
@@ -155,25 +157,102 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * flush_tlb_func_common()'s memory ordering requirement is that any
|
||||
+ * TLB fills that happen after we flush the TLB are ordered after we
|
||||
+ * read active_mm's tlb_gen. We don't need any explicit barriers
|
||||
+ * because all x86 flush operations are serializing and the
|
||||
+ * atomic64_read operation won't be reordered by the compiler.
|
||||
+ */
|
||||
static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
bool local, enum tlb_flush_reason reason)
|
||||
{
|
||||
+ /*
|
||||
+ * We have three different tlb_gen values in here. They are:
|
||||
+ *
|
||||
+ * - mm_tlb_gen: the latest generation.
|
||||
+ * - local_tlb_gen: the generation that this CPU has already caught
|
||||
+ * up to.
|
||||
+ * - f->new_tlb_gen: the generation that the requester of the flush
|
||||
+ * wants us to catch up to.
|
||||
+ */
|
||||
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
|
||||
+ u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
|
||||
+
|
||||
/* This code cannot presently handle being reentered. */
|
||||
VM_WARN_ON(!irqs_disabled());
|
||||
|
||||
+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ loaded_mm->context.ctx_id);
|
||||
+
|
||||
if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
|
||||
+ /*
|
||||
+ * leave_mm() is adequate to handle any type of flush, and
|
||||
+ * we would prefer not to receive further IPIs. leave_mm()
|
||||
+ * clears this CPU's bit in mm_cpumask().
|
||||
+ */
|
||||
leave_mm(smp_processor_id());
|
||||
return;
|
||||
}
|
||||
|
||||
- if (f->end == TLB_FLUSH_ALL) {
|
||||
- local_flush_tlb();
|
||||
- if (local)
|
||||
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
- trace_tlb_flush(reason, TLB_FLUSH_ALL);
|
||||
- } else {
|
||||
+ if (unlikely(local_tlb_gen == mm_tlb_gen)) {
|
||||
+ /*
|
||||
+ * There's nothing to do: we're already up to date. This can
|
||||
+ * happen if two concurrent flushes happen -- the first flush to
|
||||
+ * be handled can catch us all the way up, leaving no work for
|
||||
+ * the second flush.
|
||||
+ */
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
|
||||
+ WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
|
||||
+
|
||||
+ /*
|
||||
+ * If we get to this point, we know that our TLB is out of date.
|
||||
+ * This does not strictly imply that we need to flush (it's
|
||||
+ * possible that f->new_tlb_gen <= local_tlb_gen), but we're
|
||||
+ * going to need to flush in the very near future, so we might
|
||||
+ * as well get it over with.
|
||||
+ *
|
||||
+ * The only question is whether to do a full or partial flush.
|
||||
+ *
|
||||
+ * We do a partial flush if requested and two extra conditions
|
||||
+ * are met:
|
||||
+ *
|
||||
+ * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
|
||||
+ * we've always done all needed flushes to catch up to
|
||||
+ * local_tlb_gen. If, for example, local_tlb_gen == 2 and
|
||||
+ * f->new_tlb_gen == 3, then we know that the flush needed to bring
|
||||
+ * us up to date for tlb_gen 3 is the partial flush we're
|
||||
+ * processing.
|
||||
+ *
|
||||
+ * As an example of why this check is needed, suppose that there
|
||||
+ * are two concurrent flushes. The first is a full flush that
|
||||
+ * changes context.tlb_gen from 1 to 2. The second is a partial
|
||||
+ * flush that changes context.tlb_gen from 2 to 3. If they get
|
||||
+ * processed on this CPU in reverse order, we'll see
|
||||
+ * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
|
||||
+ * If we were to use __flush_tlb_single() and set local_tlb_gen to
|
||||
+ * 3, we'd be break the invariant: we'd update local_tlb_gen above
|
||||
+ * 1 without the full flush that's needed for tlb_gen 2.
|
||||
+ *
|
||||
+ * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
|
||||
+ * Partial TLB flushes are not all that much cheaper than full TLB
|
||||
+ * flushes, so it seems unlikely that it would be a performance win
|
||||
+ * to do a partial flush if that won't bring our TLB fully up to
|
||||
+ * date. By doing a full flush instead, we can increase
|
||||
+ * local_tlb_gen all the way to mm_tlb_gen and we can probably
|
||||
+ * avoid another flush in the very near future.
|
||||
+ */
|
||||
+ if (f->end != TLB_FLUSH_ALL &&
|
||||
+ f->new_tlb_gen == local_tlb_gen + 1 &&
|
||||
+ f->new_tlb_gen == mm_tlb_gen) {
|
||||
+ /* Partial flush */
|
||||
unsigned long addr;
|
||||
unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
|
||||
+
|
||||
addr = f->start;
|
||||
while (addr < f->end) {
|
||||
__flush_tlb_single(addr);
|
||||
@@ -182,7 +261,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
if (local)
|
||||
count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
|
||||
trace_tlb_flush(reason, nr_pages);
|
||||
+ } else {
|
||||
+ /* Full flush. */
|
||||
+ local_flush_tlb();
|
||||
+ if (local)
|
||||
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
+ trace_tlb_flush(reason, TLB_FLUSH_ALL);
|
||||
}
|
||||
+
|
||||
+ /* Both paths above update our state to mm_tlb_gen. */
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
|
||||
}
|
||||
|
||||
static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
|
||||
@@ -253,7 +341,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
cpu = get_cpu();
|
||||
|
||||
/* This is also a barrier that synchronizes with switch_mm(). */
|
||||
- inc_mm_tlb_gen(mm);
|
||||
+ info.new_tlb_gen = inc_mm_tlb_gen(mm);
|
||||
|
||||
/* Should we flush just the requested range? */
|
||||
if ((end != TLB_FLUSH_ALL) &&
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,454 @@
|
||||
From caa3549fe709971498eaf080c1710ef627a0df5a Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 29 Jun 2017 08:53:17 -0700
|
||||
Subject: [PATCH 041/231] x86/mm: Rework lazy TLB mode and TLB freshness
|
||||
tracking
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
x86's lazy TLB mode used to be fairly weak -- it would switch to
|
||||
init_mm the first time it tried to flush a lazy TLB. This meant an
|
||||
unnecessary CR3 write and, if the flush was remote, an unnecessary
|
||||
IPI.
|
||||
|
||||
Rewrite it entirely. When we enter lazy mode, we simply remove the
|
||||
CPU from mm_cpumask. This means that we need a way to figure out
|
||||
whether we've missed a flush when we switch back out of lazy mode.
|
||||
I use the tlb_gen machinery to track whether a context is up to
|
||||
date.
|
||||
|
||||
Note to reviewers: this patch, my itself, looks a bit odd. I'm
|
||||
using an array of length 1 containing (ctx_id, tlb_gen) rather than
|
||||
just storing tlb_gen, and making it at array isn't necessary yet.
|
||||
I'm doing this because the next few patches add PCID support, and,
|
||||
with PCID, we need ctx_id, and the array will end up with a length
|
||||
greater than 1. Making it an array now means that there will be
|
||||
less churn and therefore less stress on your eyeballs.
|
||||
|
||||
NB: This is dubious but, AFAICT, still correct on Xen and UV.
|
||||
xen_exit_mmap() uses mm_cpumask() for nefarious purposes and this
|
||||
patch changes the way that mm_cpumask() works. This should be okay,
|
||||
since Xen *also* iterates all online CPUs to find all the CPUs it
|
||||
needs to twiddle.
|
||||
|
||||
The UV tlbflush code is rather dated and should be changed.
|
||||
|
||||
Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
|
||||
(turbo off, intel_pstate requesting max performance) under KVM with
|
||||
the guest using idle=poll (to avoid artifacts when bouncing between
|
||||
CPUs). I haven't done any real statistics here -- I just ran them
|
||||
in a loop and picked the fastest results that didn't look like
|
||||
outliers. Unpatched means commit a4eb8b993554, so all the
|
||||
bookkeeping overhead is gone.
|
||||
|
||||
MADV_DONTNEED; touch the page; switch CPUs using sched_setaffinity. In
|
||||
an unpatched kernel, MADV_DONTNEED will send an IPI to the previous CPU.
|
||||
This is intended to be a nearly worst-case test.
|
||||
|
||||
patched: 13.4µs
|
||||
unpatched: 21.6µs
|
||||
|
||||
Vitaly's pthread_mmap microbenchmark with 8 threads (on four cores),
|
||||
nrounds = 100, 256M data
|
||||
|
||||
patched: 1.1 seconds or so
|
||||
unpatched: 1.9 seconds or so
|
||||
|
||||
The sleepup on Vitaly's test appearss to be because it spends a lot
|
||||
of time blocked on mmap_sem, and this patch avoids sending IPIs to
|
||||
blocked CPUs.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Banman <abanman@sgi.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Dimitri Sivanich <sivanich@sgi.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Mike Travis <travis@sgi.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/ddf2c92962339f4ba39d8fc41b853936ec0b44f1.1498751203.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 94b1b03b519b81c494900cb112aa00ed205cc2d9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b381b7ae452f2bc6384507a897247be7c93a71cc)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 6 +-
|
||||
arch/x86/include/asm/tlbflush.h | 4 -
|
||||
arch/x86/mm/init.c | 1 -
|
||||
arch/x86/mm/tlb.c | 197 ++++++++++++++++++++++---------------
|
||||
arch/x86/xen/mmu_pv.c | 5 +-
|
||||
5 files changed, 124 insertions(+), 89 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index 6c05679c715b..d6b055b328f2 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -128,8 +128,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
|
||||
|
||||
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
|
||||
+ int cpu = smp_processor_id();
|
||||
+
|
||||
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
|
||||
+ cpumask_clear_cpu(cpu, mm_cpumask(mm));
|
||||
}
|
||||
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 3a167c214560..6397275008db 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -95,7 +95,6 @@ struct tlb_state {
|
||||
* mode even if we've already switched back to swapper_pg_dir.
|
||||
*/
|
||||
struct mm_struct *loaded_mm;
|
||||
- int state;
|
||||
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
@@ -318,9 +317,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
|
||||
void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
const struct flush_tlb_info *info);
|
||||
|
||||
-#define TLBSTATE_OK 1
|
||||
-#define TLBSTATE_LAZY 2
|
||||
-
|
||||
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
||||
index df2624b091a7..c86dc071bb10 100644
|
||||
--- a/arch/x86/mm/init.c
|
||||
+++ b/arch/x86/mm/init.c
|
||||
@@ -849,7 +849,6 @@ void __init zone_sizes_init(void)
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
||||
.loaded_mm = &init_mm,
|
||||
- .state = 0,
|
||||
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(cpu_tlbstate);
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 4e5a5ddb9e4d..0982c997d36f 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -45,8 +45,8 @@ void leave_mm(int cpu)
|
||||
if (loaded_mm == &init_mm)
|
||||
return;
|
||||
|
||||
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
- BUG();
|
||||
+ /* Warn if we're not lazy. */
|
||||
+ WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
|
||||
|
||||
switch_mm(NULL, &init_mm, NULL);
|
||||
}
|
||||
@@ -65,94 +65,117 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
- unsigned cpu = smp_processor_id();
|
||||
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ unsigned cpu = smp_processor_id();
|
||||
+ u64 next_tlb_gen;
|
||||
|
||||
/*
|
||||
- * NB: The scheduler will call us with prev == next when
|
||||
- * switching from lazy TLB mode to normal mode if active_mm
|
||||
- * isn't changing. When this happens, there is no guarantee
|
||||
- * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
|
||||
+ * NB: The scheduler will call us with prev == next when switching
|
||||
+ * from lazy TLB mode to normal mode if active_mm isn't changing.
|
||||
+ * When this happens, we don't assume that CR3 (and hence
|
||||
+ * cpu_tlbstate.loaded_mm) matches next.
|
||||
*
|
||||
* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
|
||||
*/
|
||||
|
||||
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
+ /* We don't want flush_tlb_func_* to run concurrently with us. */
|
||||
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING))
|
||||
+ WARN_ON_ONCE(!irqs_disabled());
|
||||
+
|
||||
+ /*
|
||||
+ * Verify that CR3 is what we think it is. This will catch
|
||||
+ * hypothetical buggy code that directly switches to swapper_pg_dir
|
||||
+ * without going through leave_mm() / switch_mm_irqs_off().
|
||||
+ */
|
||||
+ VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
|
||||
|
||||
if (real_prev == next) {
|
||||
- /*
|
||||
- * There's nothing to do: we always keep the per-mm control
|
||||
- * regs in sync with cpu_tlbstate.loaded_mm. Just
|
||||
- * sanity-check mm_cpumask.
|
||||
- */
|
||||
- if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
|
||||
- cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
- return;
|
||||
- }
|
||||
+ VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ next->context.ctx_id);
|
||||
+
|
||||
+ if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||
+ /*
|
||||
+ * There's nothing to do: we weren't lazy, and we
|
||||
+ * aren't changing our mm. We don't need to flush
|
||||
+ * anything, nor do we need to update CR3, CR4, or
|
||||
+ * LDTR.
|
||||
+ */
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Resume remote flushes and then read tlb_gen. */
|
||||
+ cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+ next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
+
|
||||
+ if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
|
||||
+ /*
|
||||
+ * Ideally, we'd have a flush_tlb() variant that
|
||||
+ * takes the known CR3 value as input. This would
|
||||
+ * be faster on Xen PV and on hypothetical CPUs
|
||||
+ * on which INVPCID is fast.
|
||||
+ */
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
|
||||
+ next_tlb_gen);
|
||||
+ write_cr3(__pa(next->pgd));
|
||||
+
|
||||
+ /*
|
||||
+ * This gets called via leave_mm() in the idle path
|
||||
+ * where RCU functions differently. Tracing normally
|
||||
+ * uses RCU, so we have to call the tracepoint
|
||||
+ * specially here.
|
||||
+ */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
+ TLB_FLUSH_ALL);
|
||||
+ }
|
||||
|
||||
- if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
/*
|
||||
- * If our current stack is in vmalloc space and isn't
|
||||
- * mapped in the new pgd, we'll double-fault. Forcibly
|
||||
- * map it.
|
||||
+ * We just exited lazy mode, which means that CR4 and/or LDTR
|
||||
+ * may be stale. (Changes to the required CR4 and LDTR states
|
||||
+ * are not reflected in tlb_gen.)
|
||||
*/
|
||||
- unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
|
||||
-
|
||||
- pgd_t *pgd = next->pgd + stack_pgd_index;
|
||||
-
|
||||
- if (unlikely(pgd_none(*pgd)))
|
||||
- set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
|
||||
- }
|
||||
+ } else {
|
||||
+ VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
|
||||
+ next->context.ctx_id);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
+ /*
|
||||
+ * If our current stack is in vmalloc space and isn't
|
||||
+ * mapped in the new pgd, we'll double-fault. Forcibly
|
||||
+ * map it.
|
||||
+ */
|
||||
+ unsigned int index = pgd_index(current_stack_pointer());
|
||||
+ pgd_t *pgd = next->pgd + index;
|
||||
+
|
||||
+ if (unlikely(pgd_none(*pgd)))
|
||||
+ set_pgd(pgd, init_mm.pgd[index]);
|
||||
+ }
|
||||
|
||||
- this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
|
||||
+ /* Stop remote flushes for the previous mm */
|
||||
+ if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
|
||||
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
|
||||
- WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
- cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+ VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
|
||||
- /*
|
||||
- * Re-load page tables.
|
||||
- *
|
||||
- * This logic has an ordering constraint:
|
||||
- *
|
||||
- * CPU 0: Write to a PTE for 'next'
|
||||
- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
||||
- * CPU 1: set bit 1 in next's mm_cpumask
|
||||
- * CPU 1: load from the PTE that CPU 0 writes (implicit)
|
||||
- *
|
||||
- * We need to prevent an outcome in which CPU 1 observes
|
||||
- * the new PTE value and CPU 0 observes bit 1 clear in
|
||||
- * mm_cpumask. (If that occurs, then the IPI will never
|
||||
- * be sent, and CPU 0's TLB will contain a stale entry.)
|
||||
- *
|
||||
- * The bad outcome can occur if either CPU's load is
|
||||
- * reordered before that CPU's store, so both CPUs must
|
||||
- * execute full barriers to prevent this from happening.
|
||||
- *
|
||||
- * Thus, switch_mm needs a full barrier between the
|
||||
- * store to mm_cpumask and any operation that could load
|
||||
- * from next->pgd. TLB fills are special and can happen
|
||||
- * due to instruction fetches or for no reason at all,
|
||||
- * and neither LOCK nor MFENCE orders them.
|
||||
- * Fortunately, load_cr3() is serializing and gives the
|
||||
- * ordering guarantee we need.
|
||||
- */
|
||||
- load_cr3(next->pgd);
|
||||
+ /*
|
||||
+ * Start remote flushes and then read tlb_gen.
|
||||
+ */
|
||||
+ cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+ next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
|
||||
- /*
|
||||
- * This gets called via leave_mm() in the idle path where RCU
|
||||
- * functions differently. Tracing normally uses RCU, so we have to
|
||||
- * call the tracepoint specially here.
|
||||
- */
|
||||
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
+ write_cr3(__pa(next->pgd));
|
||||
|
||||
- /* Stop flush ipis for the previous mm */
|
||||
- WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
|
||||
- real_prev != &init_mm);
|
||||
- cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
+ /*
|
||||
+ * This gets called via leave_mm() in the idle path where RCU
|
||||
+ * functions differently. Tracing normally uses RCU, so we
|
||||
+ * have to call the tracepoint specially here.
|
||||
+ */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
+ TLB_FLUSH_ALL);
|
||||
+ }
|
||||
|
||||
- /* Load per-mm CR4 and LDTR state */
|
||||
load_mm_cr4(next);
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
@@ -186,13 +209,13 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
loaded_mm->context.ctx_id);
|
||||
|
||||
- if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
|
||||
+ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
|
||||
/*
|
||||
- * leave_mm() is adequate to handle any type of flush, and
|
||||
- * we would prefer not to receive further IPIs. leave_mm()
|
||||
- * clears this CPU's bit in mm_cpumask().
|
||||
+ * We're in lazy mode -- don't flush. We can get here on
|
||||
+ * remote flushes due to races and on local flushes if a
|
||||
+ * kernel thread coincidentally flushes the mm it's lazily
|
||||
+ * still using.
|
||||
*/
|
||||
- leave_mm(smp_processor_id());
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -203,6 +226,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
* be handled can catch us all the way up, leaving no work for
|
||||
* the second flush.
|
||||
*/
|
||||
+ trace_tlb_flush(reason, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -304,6 +328,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
(info->end - info->start) >> PAGE_SHIFT);
|
||||
|
||||
if (is_uv_system()) {
|
||||
+ /*
|
||||
+ * This whole special case is confused. UV has a "Broadcast
|
||||
+ * Assist Unit", which seems to be a fancy way to send IPIs.
|
||||
+ * Back when x86 used an explicit TLB flush IPI, UV was
|
||||
+ * optimized to use its own mechanism. These days, x86 uses
|
||||
+ * smp_call_function_many(), but UV still uses a manual IPI,
|
||||
+ * and that IPI's action is out of date -- it does a manual
|
||||
+ * flush instead of calling flush_tlb_func_remote(). This
|
||||
+ * means that the percpu tlb_gen variables won't be updated
|
||||
+ * and we'll do pointless flushes on future context switches.
|
||||
+ *
|
||||
+ * Rather than hooking native_flush_tlb_others() here, I think
|
||||
+ * that UV should be updated so that smp_call_function_many(),
|
||||
+ * etc, are optimal on UV.
|
||||
+ */
|
||||
unsigned int cpu;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
@@ -363,6 +402,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
|
||||
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
|
||||
flush_tlb_others(mm_cpumask(mm), &info);
|
||||
+
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
@@ -371,8 +411,6 @@ static void do_flush_tlb_all(void *info)
|
||||
{
|
||||
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
|
||||
__flush_tlb_all();
|
||||
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
|
||||
- leave_mm(smp_processor_id());
|
||||
}
|
||||
|
||||
void flush_tlb_all(void)
|
||||
@@ -425,6 +463,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
||||
|
||||
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
|
||||
flush_tlb_others(&batch->cpumask, &info);
|
||||
+
|
||||
cpumask_clear(&batch->cpumask);
|
||||
|
||||
put_cpu();
|
||||
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
|
||||
index 5f61b7e2e6b2..ba76f3ce997f 100644
|
||||
--- a/arch/x86/xen/mmu_pv.c
|
||||
+++ b/arch/x86/xen/mmu_pv.c
|
||||
@@ -1005,14 +1005,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
|
||||
/* Get the "official" set of cpus referring to our pagetable. */
|
||||
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
|
||||
for_each_online_cpu(cpu) {
|
||||
- if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
|
||||
- && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
|
||||
+ if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
|
||||
continue;
|
||||
smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
- cpumask_copy(mask, mm_cpumask(mm));
|
||||
|
||||
/*
|
||||
* It's possible that a vcpu may have a stale reference to our
|
||||
@@ -1021,6 +1019,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
|
||||
* look at its actual current cr3 value, and force it to flush
|
||||
* if needed.
|
||||
*/
|
||||
+ cpumask_clear(mask);
|
||||
for_each_online_cpu(cpu) {
|
||||
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
|
||||
cpumask_set_cpu(cpu, mask);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,340 @@
|
||||
From e3c7bff633fc1210c6b19dd3ebcafb9f6716d586 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 24 Jul 2017 21:41:38 -0700
|
||||
Subject: [PATCH 042/231] x86/mm: Implement PCID based optimization: try to
|
||||
preserve old TLB entries using PCID
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
PCID is a "process context ID" -- it's what other architectures call
|
||||
an address space ID. Every non-global TLB entry is tagged with a
|
||||
PCID, only TLB entries that match the currently selected PCID are
|
||||
used, and we can switch PGDs without flushing the TLB. x86's
|
||||
PCID is 12 bits.
|
||||
|
||||
This is an unorthodox approach to using PCID. x86's PCID is far too
|
||||
short to uniquely identify a process, and we can't even really
|
||||
uniquely identify a running process because there are monster
|
||||
systems with over 4096 CPUs. To make matters worse, past attempts
|
||||
to use all 12 PCID bits have resulted in slowdowns instead of
|
||||
speedups.
|
||||
|
||||
This patch uses PCID differently. We use a PCID to identify a
|
||||
recently-used mm on a per-cpu basis. An mm has no fixed PCID
|
||||
binding at all; instead, we give it a fresh PCID each time it's
|
||||
loaded except in cases where we want to preserve the TLB, in which
|
||||
case we reuse a recent value.
|
||||
|
||||
Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
|
||||
(turbo off, intel_pstate requesting max performance) under KVM with
|
||||
the guest using idle=poll (to avoid artifacts when bouncing between
|
||||
CPUs). I haven't done any real statistics here -- I just ran them
|
||||
in a loop and picked the fastest results that didn't look like
|
||||
outliers. Unpatched means commit a4eb8b993554, so all the
|
||||
bookkeeping overhead is gone.
|
||||
|
||||
ping-pong between two mms on the same CPU using eventfd:
|
||||
|
||||
patched: 1.22µs
|
||||
patched, nopcid: 1.33µs
|
||||
unpatched: 1.34µs
|
||||
|
||||
Same ping-pong, but now touch 512 pages (all zero-page to minimize
|
||||
cache misses) each iteration. dTLB misses are measured by
|
||||
dtlb_load_misses.miss_causes_a_walk:
|
||||
|
||||
patched: 1.8µs 11M dTLB misses
|
||||
patched, nopcid: 6.2µs, 207M dTLB misses
|
||||
unpatched: 6.1µs, 190M dTLB misses
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/9ee75f17a81770feed616358e6860d98a2a5b1e7.1500957502.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit 10af6235e0d327d42e1bad974385197817923dc1)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d833a976288cdcf7fb1dabb48ebf614ebf6a311c)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 3 ++
|
||||
arch/x86/include/asm/processor-flags.h | 2 +
|
||||
arch/x86/include/asm/tlbflush.h | 18 +++++++-
|
||||
arch/x86/mm/init.c | 1 +
|
||||
arch/x86/mm/tlb.c | 84 +++++++++++++++++++++++++---------
|
||||
5 files changed, 85 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index d6b055b328f2..7ae318c340d9 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -298,6 +298,9 @@ static inline unsigned long __get_current_cr3_fast(void)
|
||||
{
|
||||
unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
|
||||
|
||||
+ if (static_cpu_has(X86_FEATURE_PCID))
|
||||
+ cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
+
|
||||
/* For now, be very restrictive about when this can be called. */
|
||||
VM_WARN_ON(in_nmi() || preemptible());
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
|
||||
index 79aa2f98398d..791b60199aa4 100644
|
||||
--- a/arch/x86/include/asm/processor-flags.h
|
||||
+++ b/arch/x86/include/asm/processor-flags.h
|
||||
@@ -35,6 +35,7 @@
|
||||
/* Mask off the address space ID bits. */
|
||||
#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
|
||||
#define CR3_PCID_MASK 0xFFFull
|
||||
+#define CR3_NOFLUSH (1UL << 63)
|
||||
#else
|
||||
/*
|
||||
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
|
||||
@@ -42,6 +43,7 @@
|
||||
*/
|
||||
#define CR3_ADDR_MASK 0xFFFFFFFFull
|
||||
#define CR3_PCID_MASK 0ull
|
||||
+#define CR3_NOFLUSH 0
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index 6397275008db..d23e61dc0640 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -82,6 +82,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
|
||||
#endif
|
||||
|
||||
+/*
|
||||
+ * 6 because 6 should be plenty and struct tlb_state will fit in
|
||||
+ * two cache lines.
|
||||
+ */
|
||||
+#define TLB_NR_DYN_ASIDS 6
|
||||
+
|
||||
struct tlb_context {
|
||||
u64 ctx_id;
|
||||
u64 tlb_gen;
|
||||
@@ -95,6 +101,8 @@ struct tlb_state {
|
||||
* mode even if we've already switched back to swapper_pg_dir.
|
||||
*/
|
||||
struct mm_struct *loaded_mm;
|
||||
+ u16 loaded_mm_asid;
|
||||
+ u16 next_asid;
|
||||
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
@@ -104,7 +112,8 @@ struct tlb_state {
|
||||
|
||||
/*
|
||||
* This is a list of all contexts that might exist in the TLB.
|
||||
- * Since we don't yet use PCID, there is only one context.
|
||||
+ * There is one per ASID that we use, and the ASID (what the
|
||||
+ * CPU calls PCID) is the index into ctxts.
|
||||
*
|
||||
* For each context, ctx_id indicates which mm the TLB's user
|
||||
* entries came from. As an invariant, the TLB will never
|
||||
@@ -114,8 +123,13 @@ struct tlb_state {
|
||||
* To be clear, this means that it's legal for the TLB code to
|
||||
* flush the TLB without updating tlb_gen. This can happen
|
||||
* (for now, at least) due to paravirt remote flushes.
|
||||
+ *
|
||||
+ * NB: context 0 is a bit special, since it's also used by
|
||||
+ * various bits of init code. This is fine -- code that
|
||||
+ * isn't aware of PCID will end up harmlessly flushing
|
||||
+ * context 0.
|
||||
*/
|
||||
- struct tlb_context ctxs[1];
|
||||
+ struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
|
||||
};
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
|
||||
|
||||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
||||
index c86dc071bb10..af5c1ed21d43 100644
|
||||
--- a/arch/x86/mm/init.c
|
||||
+++ b/arch/x86/mm/init.c
|
||||
@@ -849,6 +849,7 @@ void __init zone_sizes_init(void)
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
||||
.loaded_mm = &init_mm,
|
||||
+ .next_asid = 1,
|
||||
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(cpu_tlbstate);
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 0982c997d36f..57943b4d8f2e 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -30,6 +30,40 @@
|
||||
|
||||
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
|
||||
|
||||
+static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
|
||||
+ u16 *new_asid, bool *need_flush)
|
||||
+{
|
||||
+ u16 asid;
|
||||
+
|
||||
+ if (!static_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ *new_asid = 0;
|
||||
+ *need_flush = true;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
|
||||
+ if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
|
||||
+ next->context.ctx_id)
|
||||
+ continue;
|
||||
+
|
||||
+ *new_asid = asid;
|
||||
+ *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
|
||||
+ next_tlb_gen);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * We don't currently own an ASID slot on this CPU.
|
||||
+ * Allocate a slot.
|
||||
+ */
|
||||
+ *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
|
||||
+ if (*new_asid >= TLB_NR_DYN_ASIDS) {
|
||||
+ *new_asid = 0;
|
||||
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
|
||||
+ }
|
||||
+ *need_flush = true;
|
||||
+}
|
||||
+
|
||||
void leave_mm(int cpu)
|
||||
{
|
||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
@@ -66,6 +100,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
unsigned cpu = smp_processor_id();
|
||||
u64 next_tlb_gen;
|
||||
|
||||
@@ -85,12 +120,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
/*
|
||||
* Verify that CR3 is what we think it is. This will catch
|
||||
* hypothetical buggy code that directly switches to swapper_pg_dir
|
||||
- * without going through leave_mm() / switch_mm_irqs_off().
|
||||
+ * without going through leave_mm() / switch_mm_irqs_off() or that
|
||||
+ * does something like write_cr3(read_cr3_pa()).
|
||||
*/
|
||||
- VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
|
||||
+ VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
|
||||
|
||||
if (real_prev == next) {
|
||||
- VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||
next->context.ctx_id);
|
||||
|
||||
if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||
@@ -107,16 +143,17 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
|
||||
- if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
|
||||
+ if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
|
||||
+ next_tlb_gen) {
|
||||
/*
|
||||
* Ideally, we'd have a flush_tlb() variant that
|
||||
* takes the known CR3 value as input. This would
|
||||
* be faster on Xen PV and on hypothetical CPUs
|
||||
* on which INVPCID is fast.
|
||||
*/
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
|
||||
next_tlb_gen);
|
||||
- write_cr3(__pa(next->pgd));
|
||||
+ write_cr3(__pa(next->pgd) | prev_asid);
|
||||
|
||||
/*
|
||||
* This gets called via leave_mm() in the idle path
|
||||
@@ -134,8 +171,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
* are not reflected in tlb_gen.)
|
||||
*/
|
||||
} else {
|
||||
- VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
|
||||
- next->context.ctx_id);
|
||||
+ u16 new_asid;
|
||||
+ bool need_flush;
|
||||
|
||||
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
/*
|
||||
@@ -162,18 +199,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
|
||||
- this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
- write_cr3(__pa(next->pgd));
|
||||
+ choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
|
||||
|
||||
- /*
|
||||
- * This gets called via leave_mm() in the idle path where RCU
|
||||
- * functions differently. Tracing normally uses RCU, so we
|
||||
- * have to call the tracepoint specially here.
|
||||
- */
|
||||
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
+ if (need_flush) {
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
+ write_cr3(__pa(next->pgd) | new_asid);
|
||||
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
TLB_FLUSH_ALL);
|
||||
+ } else {
|
||||
+ /* The new ASID is already up to date. */
|
||||
+ write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
|
||||
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
+ }
|
||||
+
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
|
||||
}
|
||||
|
||||
load_mm_cr4(next);
|
||||
@@ -200,13 +241,14 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
* wants us to catch up to.
|
||||
*/
|
||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
|
||||
- u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
|
||||
+ u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
|
||||
|
||||
/* This code cannot presently handle being reentered. */
|
||||
VM_WARN_ON(!irqs_disabled());
|
||||
|
||||
- VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
|
||||
+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
|
||||
loaded_mm->context.ctx_id);
|
||||
|
||||
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
|
||||
@@ -294,7 +336,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
}
|
||||
|
||||
/* Both paths above update our state to mm_tlb_gen. */
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
|
||||
}
|
||||
|
||||
static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
|
||||
--
|
||||
2.14.2
|
||||
|
176
patches/kernel/0043-x86-mm-Factor-out-CR3-building-code.patch
Normal file
176
patches/kernel/0043-x86-mm-Factor-out-CR3-building-code.patch
Normal file
@ -0,0 +1,176 @@
|
||||
From ddb5e7b381d37d0f8bca61f0b761ae5c3a2f5ee0 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sun, 17 Sep 2017 09:03:48 -0700
|
||||
Subject: [PATCH 043/231] x86/mm: Factor out CR3-building code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Current, the code that assembles a value to load into CR3 is
|
||||
open-coded everywhere. Factor it out into helpers build_cr3() and
|
||||
build_cr3_noflush().
|
||||
|
||||
This makes one semantic change: __get_current_cr3_fast() was wrong
|
||||
on SME systems. No one noticed because the only caller is in the
|
||||
VMX code, and there are no CPUs with both SME and VMX.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
|
||||
Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 15 ++++++---
|
||||
arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++---
|
||||
2 files changed, 75 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index 7ae318c340d9..a999ba6b721f 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
return __pkru_allows_pkey(vma_pkey(vma), write);
|
||||
}
|
||||
|
||||
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
|
||||
+{
|
||||
+ return __sme_pa(mm->pgd) | asid;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
||||
+{
|
||||
+ return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
|
||||
+}
|
||||
|
||||
/*
|
||||
* This can be used from process context to figure out what the value of
|
||||
@@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
*/
|
||||
static inline unsigned long __get_current_cr3_fast(void)
|
||||
{
|
||||
- unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
|
||||
-
|
||||
- if (static_cpu_has(X86_FEATURE_PCID))
|
||||
- cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
|
||||
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid));
|
||||
|
||||
/* For now, be very restrictive about when this can be called. */
|
||||
VM_WARN_ON(in_nmi() || preemptible());
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 57943b4d8f2e..440400316c8a 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
* without going through leave_mm() / switch_mm_irqs_off() or that
|
||||
* does something like write_cr3(read_cr3_pa()).
|
||||
*/
|
||||
- VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
|
||||
+#ifdef CONFIG_DEBUG_VM
|
||||
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
|
||||
+ /*
|
||||
+ * If we were to BUG here, we'd be very likely to kill
|
||||
+ * the system so hard that we don't see the call trace.
|
||||
+ * Try to recover instead by ignoring the error and doing
|
||||
+ * a global flush to minimize the chance of corruption.
|
||||
+ *
|
||||
+ * (This is far from being a fully correct recovery.
|
||||
+ * Architecturally, the CPU could prefetch something
|
||||
+ * back into an incorrect ASID slot and leave it there
|
||||
+ * to cause trouble down the road. It's better than
|
||||
+ * nothing, though.)
|
||||
+ */
|
||||
+ __flush_tlb_all();
|
||||
+ }
|
||||
+#endif
|
||||
|
||||
if (real_prev == next) {
|
||||
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||
@@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
*/
|
||||
this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
|
||||
next_tlb_gen);
|
||||
- write_cr3(__pa(next->pgd) | prev_asid);
|
||||
+ write_cr3(build_cr3(next, prev_asid));
|
||||
|
||||
/*
|
||||
* This gets called via leave_mm() in the idle path
|
||||
@@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
if (need_flush) {
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
- write_cr3(__pa(next->pgd) | new_asid);
|
||||
+ write_cr3(build_cr3(next, new_asid));
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
TLB_FLUSH_ALL);
|
||||
} else {
|
||||
/* The new ASID is already up to date. */
|
||||
- write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
|
||||
+ write_cr3(build_cr3_noflush(next, new_asid));
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
}
|
||||
|
||||
@@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Call this when reinitializing a CPU. It fixes the following potential
|
||||
+ * problems:
|
||||
+ *
|
||||
+ * - The ASID changed from what cpu_tlbstate thinks it is (most likely
|
||||
+ * because the CPU was taken down and came back up with CR3's PCID
|
||||
+ * bits clear. CPU hotplug can do this.
|
||||
+ *
|
||||
+ * - The TLB contains junk in slots corresponding to inactive ASIDs.
|
||||
+ *
|
||||
+ * - The CPU went so far out to lunch that it may have missed a TLB
|
||||
+ * flush.
|
||||
+ */
|
||||
+void initialize_tlbstate_and_flush(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
+ u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
|
||||
+ unsigned long cr3 = __read_cr3();
|
||||
+
|
||||
+ /* Assert that CR3 already references the right mm. */
|
||||
+ WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
|
||||
+
|
||||
+ /*
|
||||
+ * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
|
||||
+ * doesn't work like other CR4 bits because it can only be set from
|
||||
+ * long mode.)
|
||||
+ */
|
||||
+ WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
|
||||
+ !(cr4_read_shadow() & X86_CR4_PCIDE));
|
||||
+
|
||||
+ /* Force ASID 0 and force a TLB flush. */
|
||||
+ write_cr3(build_cr3(mm, 0));
|
||||
+
|
||||
+ /* Reinitialize tlbstate. */
|
||||
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
||||
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
|
||||
+
|
||||
+ for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
|
||||
+ this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* flush_tlb_func_common()'s memory ordering requirement is that any
|
||||
* TLB fills that happen after we flush the TLB are ordered after we
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,86 @@
|
||||
From b5143e55b3bf018b3ad2598e677ceb5e155eba6f Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sun, 17 Sep 2017 09:03:49 -0700
|
||||
Subject: [PATCH 044/231] x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware
|
||||
code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Putting the logical ASID into CR3's PCID bits directly means that we
|
||||
have two cases to consider separately: ASID == 0 and ASID != 0.
|
||||
This means that bugs that only hit in one of these cases trigger
|
||||
nondeterministically.
|
||||
|
||||
There were some bugs like this in the past, and I think there's
|
||||
still one in current kernels. In particular, we have a number of
|
||||
ASID-unware code paths that save CR3, write some special value, and
|
||||
then restore CR3. This includes suspend/resume, hibernate, kexec,
|
||||
EFI, and maybe other things I've missed. This is currently
|
||||
dangerous: if ASID != 0, then this code sequence will leave garbage
|
||||
in the TLB tagged for ASID 0. We could potentially see corruption
|
||||
when switching back to ASID 0. In principle, an
|
||||
initialize_tlbstate_and_flush() call after these sequences would
|
||||
solve the problem, but EFI, at least, does not call this. (And it
|
||||
probably shouldn't -- initialize_tlbstate_and_flush() is rather
|
||||
expensive.)
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/cdc14bbe5d3c3ef2a562be09a6368ffe9bd947a6.1505663533.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 52a2af400c1075219b3f0ce5c96fc961da44018a)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 15e474753e66e44da1365049f465427053a453ba)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 21 +++++++++++++++++++--
|
||||
1 file changed, 19 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index a999ba6b721f..c120b5db178a 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -286,14 +286,31 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
return __pkru_allows_pkey(vma_pkey(vma), write);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
|
||||
+ * bits. This serves two purposes. It prevents a nasty situation in
|
||||
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
|
||||
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
|
||||
+ * the saved ASID was nonzero. It also means that any bugs involving
|
||||
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
|
||||
+ * deterministically.
|
||||
+ */
|
||||
+
|
||||
static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
|
||||
{
|
||||
- return __sme_pa(mm->pgd) | asid;
|
||||
+ if (static_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ VM_WARN_ON_ONCE(asid > 4094);
|
||||
+ return __sme_pa(mm->pgd) | (asid + 1);
|
||||
+ } else {
|
||||
+ VM_WARN_ON_ONCE(asid != 0);
|
||||
+ return __sme_pa(mm->pgd);
|
||||
+ }
|
||||
}
|
||||
|
||||
static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
||||
{
|
||||
- return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
|
||||
+ VM_WARN_ON_ONCE(asid > 4094);
|
||||
+ return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,401 @@
|
||||
From d1ffadc67e2eee2d5f8626dca6646e70e3aa9d76 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Mon, 9 Oct 2017 09:50:49 -0700
|
||||
Subject: [PATCH 045/231] x86/mm: Flush more aggressively in lazy TLB mode
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Since commit:
|
||||
|
||||
94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
|
||||
|
||||
x86's lazy TLB mode has been all the way lazy: when running a kernel thread
|
||||
(including the idle thread), the kernel keeps using the last user mm's
|
||||
page tables without attempting to maintain user TLB coherence at all.
|
||||
|
||||
From a pure semantic perspective, this is fine -- kernel threads won't
|
||||
attempt to access user pages, so having stale TLB entries doesn't matter.
|
||||
|
||||
Unfortunately, I forgot about a subtlety. By skipping TLB flushes,
|
||||
we also allow any paging-structure caches that may exist on the CPU
|
||||
to become incoherent. This means that we can have a
|
||||
paging-structure cache entry that references a freed page table, and
|
||||
the CPU is within its rights to do a speculative page walk starting
|
||||
at the freed page table.
|
||||
|
||||
I can imagine this causing two different problems:
|
||||
|
||||
- A speculative page walk starting from a bogus page table could read
|
||||
IO addresses. I haven't seen any reports of this causing problems.
|
||||
|
||||
- A speculative page walk that involves a bogus page table can install
|
||||
garbage in the TLB. Such garbage would always be at a user VA, but
|
||||
some AMD CPUs have logic that triggers a machine check when it notices
|
||||
these bogus entries. I've seen a couple reports of this.
|
||||
|
||||
Boris further explains the failure mode:
|
||||
|
||||
> It is actually more of an optimization which assumes that paging-structure
|
||||
> entries are in WB DRAM:
|
||||
>
|
||||
> "TlbCacheDis: cacheable memory disable. Read-write. 0=Enables
|
||||
> performance optimization that assumes PML4, PDP, PDE, and PTE entries
|
||||
> are in cacheable WB-DRAM; memory type checks may be bypassed, and
|
||||
> addresses outside of WB-DRAM may result in undefined behavior or NB
|
||||
> protocol errors. 1=Disables performance optimization and allows PML4,
|
||||
> PDP, PDE and PTE entries to be in any memory type. Operating systems
|
||||
> that maintain page tables in memory types other than WB- DRAM must set
|
||||
> TlbCacheDis to insure proper operation."
|
||||
>
|
||||
> The MCE generated is an NB protocol error to signal that
|
||||
>
|
||||
> "Link: A specific coherent-only packet from a CPU was issued to an
|
||||
> IO link. This may be caused by software which addresses page table
|
||||
> structures in a memory type other than cacheable WB-DRAM without
|
||||
> properly configuring MSRC001_0015[TlbCacheDis]. This may occur, for
|
||||
> example, when page table structure addresses are above top of memory. In
|
||||
> such cases, the NB will generate an MCE if it sees a mismatch between
|
||||
> the memory operation generated by the core and the link type."
|
||||
>
|
||||
> I'm assuming coherent-only packets don't go out on IO links, thus the
|
||||
> error.
|
||||
|
||||
To fix this, reinstate TLB coherence in lazy mode. With this patch
|
||||
applied, we do it in one of two ways:
|
||||
|
||||
- If we have PCID, we simply switch back to init_mm's page tables
|
||||
when we enter a kernel thread -- this seems to be quite cheap
|
||||
except for the cost of serializing the CPU.
|
||||
|
||||
- If we don't have PCID, then we set a flag and switch to init_mm
|
||||
the first time we would otherwise need to flush the TLB.
|
||||
|
||||
The /sys/kernel/debug/x86/tlb_use_lazy_mode debug switch can be changed
|
||||
to override the default mode for benchmarking.
|
||||
|
||||
In theory, we could optimize this better by only flushing the TLB in
|
||||
lazy CPUs when a page table is freed. Doing that would require
|
||||
auditing the mm code to make sure that all page table freeing goes
|
||||
through tlb_remove_page() as well as reworking some data structures
|
||||
to implement the improved flush logic.
|
||||
|
||||
Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
|
||||
Reported-by: Adam Borowski <kilobyte@angband.pl>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Signed-off-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Daniel Borkmann <daniel@iogearbox.net>
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Cc: Johannes Hirte <johannes.hirte@datenkhaos.de>
|
||||
Cc: Kees Cook <keescook@chromium.org>
|
||||
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Nadav Amit <nadav.amit@gmail.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Roman Kagan <rkagan@virtuozzo.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Fixes: 94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
|
||||
Link: http://lkml.kernel.org/r/20171009170231.fkpraqokz6e4zeco@pd.tnic
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit b956575bed91ecfb136a8300742ecbbf451471ab)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a4bb9409c548ece51ec246fc5113a32b8d130142)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/mmu_context.h | 8 +-
|
||||
arch/x86/include/asm/tlbflush.h | 24 ++++++
|
||||
arch/x86/mm/tlb.c | 160 +++++++++++++++++++++++++------------
|
||||
3 files changed, 136 insertions(+), 56 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||
index c120b5db178a..3c856a15b98e 100644
|
||||
--- a/arch/x86/include/asm/mmu_context.h
|
||||
+++ b/arch/x86/include/asm/mmu_context.h
|
||||
@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
}
|
||||
|
||||
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
-{
|
||||
- int cpu = smp_processor_id();
|
||||
-
|
||||
- if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
|
||||
- cpumask_clear_cpu(cpu, mm_cpumask(mm));
|
||||
-}
|
||||
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
|
||||
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
||||
index d23e61dc0640..6533da3036c9 100644
|
||||
--- a/arch/x86/include/asm/tlbflush.h
|
||||
+++ b/arch/x86/include/asm/tlbflush.h
|
||||
@@ -82,6 +82,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
|
||||
#endif
|
||||
|
||||
+/*
|
||||
+ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
|
||||
+ * to init_mm when we switch to a kernel thread (e.g. the idle thread). If
|
||||
+ * it's false, then we immediately switch CR3 when entering a kernel thread.
|
||||
+ */
|
||||
+DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
|
||||
+
|
||||
/*
|
||||
* 6 because 6 should be plenty and struct tlb_state will fit in
|
||||
* two cache lines.
|
||||
@@ -104,6 +111,23 @@ struct tlb_state {
|
||||
u16 loaded_mm_asid;
|
||||
u16 next_asid;
|
||||
|
||||
+ /*
|
||||
+ * We can be in one of several states:
|
||||
+ *
|
||||
+ * - Actively using an mm. Our CPU's bit will be set in
|
||||
+ * mm_cpumask(loaded_mm) and is_lazy == false;
|
||||
+ *
|
||||
+ * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
|
||||
+ * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
|
||||
+ *
|
||||
+ * - Lazily using a real mm. loaded_mm != &init_mm, our bit
|
||||
+ * is set in mm_cpumask(loaded_mm), but is_lazy == true.
|
||||
+ * We're heuristically guessing that the CR3 load we
|
||||
+ * skipped more than makes up for the overhead added by
|
||||
+ * lazy mode.
|
||||
+ */
|
||||
+ bool is_lazy;
|
||||
+
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
* disabling interrupts when modifying either one.
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index 440400316c8a..b27aceaf7ed1 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -30,6 +30,8 @@
|
||||
|
||||
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
|
||||
|
||||
+DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
|
||||
+
|
||||
static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
|
||||
u16 *new_asid, bool *need_flush)
|
||||
{
|
||||
@@ -80,7 +82,7 @@ void leave_mm(int cpu)
|
||||
return;
|
||||
|
||||
/* Warn if we're not lazy. */
|
||||
- WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
|
||||
+ WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
|
||||
|
||||
switch_mm(NULL, &init_mm, NULL);
|
||||
}
|
||||
@@ -140,52 +142,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
__flush_tlb_all();
|
||||
}
|
||||
#endif
|
||||
+ this_cpu_write(cpu_tlbstate.is_lazy, false);
|
||||
|
||||
if (real_prev == next) {
|
||||
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||
next->context.ctx_id);
|
||||
|
||||
- if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||
- /*
|
||||
- * There's nothing to do: we weren't lazy, and we
|
||||
- * aren't changing our mm. We don't need to flush
|
||||
- * anything, nor do we need to update CR3, CR4, or
|
||||
- * LDTR.
|
||||
- */
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- /* Resume remote flushes and then read tlb_gen. */
|
||||
- cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
- next_tlb_gen = atomic64_read(&next->context.tlb_gen);
|
||||
-
|
||||
- if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
|
||||
- next_tlb_gen) {
|
||||
- /*
|
||||
- * Ideally, we'd have a flush_tlb() variant that
|
||||
- * takes the known CR3 value as input. This would
|
||||
- * be faster on Xen PV and on hypothetical CPUs
|
||||
- * on which INVPCID is fast.
|
||||
- */
|
||||
- this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
|
||||
- next_tlb_gen);
|
||||
- write_cr3(build_cr3(next, prev_asid));
|
||||
-
|
||||
- /*
|
||||
- * This gets called via leave_mm() in the idle path
|
||||
- * where RCU functions differently. Tracing normally
|
||||
- * uses RCU, so we have to call the tracepoint
|
||||
- * specially here.
|
||||
- */
|
||||
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
- TLB_FLUSH_ALL);
|
||||
- }
|
||||
-
|
||||
/*
|
||||
- * We just exited lazy mode, which means that CR4 and/or LDTR
|
||||
- * may be stale. (Changes to the required CR4 and LDTR states
|
||||
- * are not reflected in tlb_gen.)
|
||||
+ * We don't currently support having a real mm loaded without
|
||||
+ * our cpu set in mm_cpumask(). We have all the bookkeeping
|
||||
+ * in place to figure out whether we would need to flush
|
||||
+ * if our cpu were cleared in mm_cpumask(), but we don't
|
||||
+ * currently use it.
|
||||
*/
|
||||
+ if (WARN_ON_ONCE(real_prev != &init_mm &&
|
||||
+ !cpumask_test_cpu(cpu, mm_cpumask(next))))
|
||||
+ cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
+
|
||||
+ return;
|
||||
} else {
|
||||
u16 new_asid;
|
||||
bool need_flush;
|
||||
@@ -204,10 +178,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
}
|
||||
|
||||
/* Stop remote flushes for the previous mm */
|
||||
- if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
|
||||
- cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
-
|
||||
- VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
|
||||
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
|
||||
+ real_prev != &init_mm);
|
||||
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
|
||||
|
||||
/*
|
||||
* Start remote flushes and then read tlb_gen.
|
||||
@@ -237,6 +210,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
switch_ldt(real_prev, next);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
|
||||
+ * kernel thread or other context without an mm. Acceptable implementations
|
||||
+ * include doing nothing whatsoever, switching to init_mm, or various clever
|
||||
+ * lazy tricks to try to minimize TLB flushes.
|
||||
+ *
|
||||
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
|
||||
+ * in a row. It will notify us that we're going back to a real mm by
|
||||
+ * calling switch_mm_irqs_off().
|
||||
+ */
|
||||
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
+{
|
||||
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
|
||||
+ return;
|
||||
+
|
||||
+ if (static_branch_unlikely(&tlb_use_lazy_mode)) {
|
||||
+ /*
|
||||
+ * There's a significant optimization that may be possible
|
||||
+ * here. We have accurate enough TLB flush tracking that we
|
||||
+ * don't need to maintain coherence of TLB per se when we're
|
||||
+ * lazy. We do, however, need to maintain coherence of
|
||||
+ * paging-structure caches. We could, in principle, leave our
|
||||
+ * old mm loaded and only switch to init_mm when
|
||||
+ * tlb_remove_page() happens.
|
||||
+ */
|
||||
+ this_cpu_write(cpu_tlbstate.is_lazy, true);
|
||||
+ } else {
|
||||
+ switch_mm(NULL, &init_mm, NULL);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Call this when reinitializing a CPU. It fixes the following potential
|
||||
* problems:
|
||||
@@ -308,16 +312,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||
/* This code cannot presently handle being reentered. */
|
||||
VM_WARN_ON(!irqs_disabled());
|
||||
|
||||
+ if (unlikely(loaded_mm == &init_mm))
|
||||
+ return;
|
||||
+
|
||||
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
|
||||
loaded_mm->context.ctx_id);
|
||||
|
||||
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
|
||||
+ if (this_cpu_read(cpu_tlbstate.is_lazy)) {
|
||||
/*
|
||||
- * We're in lazy mode -- don't flush. We can get here on
|
||||
- * remote flushes due to races and on local flushes if a
|
||||
- * kernel thread coincidentally flushes the mm it's lazily
|
||||
- * still using.
|
||||
+ * We're in lazy mode. We need to at least flush our
|
||||
+ * paging-structure cache to avoid speculatively reading
|
||||
+ * garbage into our TLB. Since switching to init_mm is barely
|
||||
+ * slower than a minimal flush, just switch to init_mm.
|
||||
*/
|
||||
+ switch_mm_irqs_off(NULL, &init_mm, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -616,3 +624,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
|
||||
return 0;
|
||||
}
|
||||
late_initcall(create_tlb_single_page_flush_ceiling);
|
||||
+
|
||||
+static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
|
||||
+ size_t count, loff_t *ppos)
|
||||
+{
|
||||
+ char buf[2];
|
||||
+
|
||||
+ buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
|
||||
+ buf[1] = '\n';
|
||||
+
|
||||
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
|
||||
+}
|
||||
+
|
||||
+static ssize_t tlblazy_write_file(struct file *file,
|
||||
+ const char __user *user_buf, size_t count, loff_t *ppos)
|
||||
+{
|
||||
+ bool val;
|
||||
+
|
||||
+ if (kstrtobool_from_user(user_buf, count, &val))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (val)
|
||||
+ static_branch_enable(&tlb_use_lazy_mode);
|
||||
+ else
|
||||
+ static_branch_disable(&tlb_use_lazy_mode);
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations fops_tlblazy = {
|
||||
+ .read = tlblazy_read_file,
|
||||
+ .write = tlblazy_write_file,
|
||||
+ .llseek = default_llseek,
|
||||
+};
|
||||
+
|
||||
+static int __init init_tlb_use_lazy_mode(void)
|
||||
+{
|
||||
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
|
||||
+ /*
|
||||
+ * Heuristic: with PCID on, switching to and from
|
||||
+ * init_mm is reasonably fast, but remote flush IPIs
|
||||
+ * as expensive as ever, so turn off lazy TLB mode.
|
||||
+ *
|
||||
+ * We can't do this in setup_pcid() because static keys
|
||||
+ * haven't been initialized yet, and it would blow up
|
||||
+ * badly.
|
||||
+ */
|
||||
+ static_branch_disable(&tlb_use_lazy_mode);
|
||||
+ }
|
||||
+
|
||||
+ debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
|
||||
+ arch_debugfs_dir, NULL, &fops_tlblazy);
|
||||
+ return 0;
|
||||
+}
|
||||
+late_initcall(init_tlb_use_lazy_mode);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,101 @@
|
||||
From 1ef06119163f106fc0de4990e7ae559e9a5a8169 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sat, 4 Nov 2017 04:16:12 -0700
|
||||
Subject: [PATCH 046/231] Revert "x86/mm: Stop calling leave_mm() in idle code"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This reverts commit 43858b4f25cf0adc5c2ca9cf5ce5fdf2532941e5.
|
||||
|
||||
The reason I removed the leave_mm() calls in question is because the
|
||||
heuristic wasn't needed after that patch. With the original version
|
||||
of my PCID series, we never flushed a "lazy cpu" (i.e. a CPU running
|
||||
kernel thread) due a flush on the loaded mm.
|
||||
|
||||
Unfortunately, that caused architectural issues, so now I've
|
||||
reinstated these flushes on non-PCID systems in:
|
||||
|
||||
commit b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode").
|
||||
|
||||
That, in turn, gives us a power management and occasionally
|
||||
performance regression as compared to old kernels: a process that
|
||||
goes into a deep idle state on a given CPU and gets its mm flushed
|
||||
due to activity on a different CPU will wake the idle CPU.
|
||||
|
||||
Reinstate the old ugly heuristic: if a CPU goes into ACPI C3 or an
|
||||
intel_idle state that is likely to cause a TLB flush gets its mm
|
||||
switched to init_mm before going idle.
|
||||
|
||||
FWIW, this heuristic is lousy. Whether we should change CR3 before
|
||||
idle isn't a good hint except insofar as the performance hit is a bit
|
||||
lower if the TLB is getting flushed by the idle code anyway. What we
|
||||
really want to know is whether we anticipate being idle long enough
|
||||
that the mm is likely to be flushed before we wake up. This is more a
|
||||
matter of the expected latency than the idle state that gets chosen.
|
||||
This heuristic also completely fails on systems that don't know
|
||||
whether the TLB will be flushed (e.g. AMD systems?). OTOH it may be a
|
||||
bit obsolete anyway -- PCID systems don't presently benefit from this
|
||||
heuristic at all.
|
||||
|
||||
We also shouldn't do this callback from innermost bit of the idle code
|
||||
due to the RCU nastiness it causes. All the information need is
|
||||
available before rcu_idle_enter() needs to happen.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Fixes: 43858b4f25cf "x86/mm: Stop calling leave_mm() in idle code"
|
||||
Link: http://lkml.kernel.org/r/c513bbd4e653747213e05bc7062de000bf0202a5.1509793738.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 675357362aeba19688440eb1aaa7991067f73b12)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b607843145fd0593fcd87e2596d1dc5a1d5f79a5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/mm/tlb.c | 16 +++++++++++++---
|
||||
1 file changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||
index b27aceaf7ed1..ed06f1593390 100644
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -194,12 +194,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
write_cr3(build_cr3(next, new_asid));
|
||||
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
|
||||
- TLB_FLUSH_ALL);
|
||||
+
|
||||
+ /*
|
||||
+ * NB: This gets called via leave_mm() in the idle path
|
||||
+ * where RCU functions differently. Tracing normally
|
||||
+ * uses RCU, so we need to use the _rcuidle variant.
|
||||
+ *
|
||||
+ * (There is no good reason for this. The idle code should
|
||||
+ * be rearranged to call this before rcu_idle_enter().)
|
||||
+ */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
} else {
|
||||
/* The new ASID is already up to date. */
|
||||
write_cr3(build_cr3_noflush(next, new_asid));
|
||||
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
+
|
||||
+ /* See above wrt _rcuidle. */
|
||||
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
}
|
||||
|
||||
this_cpu_write(cpu_tlbstate.loaded_mm, next);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,86 @@
|
||||
From f2c1440e8f0b728d48ee8ce295f4dfe495949e1f Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Tue, 3 Oct 2017 08:51:43 -0500
|
||||
Subject: [PATCH 047/231] kprobes/x86: Set up frame pointer in kprobe
|
||||
trampoline
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Richard Weinberger saw an unwinder warning when running bcc's opensnoop:
|
||||
|
||||
WARNING: kernel stack frame pointer at ffff99ef4076bea0 in opensnoop:2008 has bad value 0000000000000008
|
||||
unwind stack type:0 next_sp: (null) mask:0x2 graph_idx:0
|
||||
...
|
||||
ffff99ef4076be88: ffff99ef4076bea0 (0xffff99ef4076bea0)
|
||||
ffff99ef4076be90: ffffffffac442721 (optimized_callback +0x81/0x90)
|
||||
...
|
||||
|
||||
A lockdep stack trace was initiated from inside a kprobe handler, when
|
||||
the unwinder noticed a bad frame pointer on the stack. The bad frame
|
||||
pointer is related to the fact that the kprobe optprobe trampoline
|
||||
doesn't save the frame pointer before calling into optimized_callback().
|
||||
|
||||
Reported-and-tested-by: Richard Weinberger <richard@sigma-star.at>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
|
||||
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
|
||||
Cc: David S . Miller <davem@davemloft.net>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/7aef2f8ecd75c2f505ef9b80490412262cf4a44c.1507038547.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit ee213fc72fd67d0988525af501534f4cb924d1e9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0f7d5518c91335584b16c7bed1c54c10b78ea76a)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/kprobes/common.h | 13 +++++++++++--
|
||||
1 file changed, 11 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
|
||||
index db2182d63ed0..3fc0f9a794cb 100644
|
||||
--- a/arch/x86/kernel/kprobes/common.h
|
||||
+++ b/arch/x86/kernel/kprobes/common.h
|
||||
@@ -3,6 +3,15 @@
|
||||
|
||||
/* Kprobes and Optprobes common header */
|
||||
|
||||
+#include <asm/asm.h>
|
||||
+
|
||||
+#ifdef CONFIG_FRAME_POINTER
|
||||
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
|
||||
+ " mov %" _ASM_SP ", %" _ASM_BP "\n"
|
||||
+#else
|
||||
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_X86_64
|
||||
#define SAVE_REGS_STRING \
|
||||
/* Skip cs, ip, orig_ax. */ \
|
||||
@@ -17,7 +26,7 @@
|
||||
" pushq %r10\n" \
|
||||
" pushq %r11\n" \
|
||||
" pushq %rbx\n" \
|
||||
- " pushq %rbp\n" \
|
||||
+ SAVE_RBP_STRING \
|
||||
" pushq %r12\n" \
|
||||
" pushq %r13\n" \
|
||||
" pushq %r14\n" \
|
||||
@@ -48,7 +57,7 @@
|
||||
" pushl %es\n" \
|
||||
" pushl %ds\n" \
|
||||
" pushl %eax\n" \
|
||||
- " pushl %ebp\n" \
|
||||
+ SAVE_RBP_STRING \
|
||||
" pushl %edi\n" \
|
||||
" pushl %esi\n" \
|
||||
" pushl %edx\n" \
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,140 @@
|
||||
From 179faefa769caa263bc88b1f7292be7a60df4298 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 Aug 2017 08:47:21 +0200
|
||||
Subject: [PATCH 048/231] x86/tracing: Introduce a static key for exception
|
||||
tracing
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Switching the IDT just for avoiding tracepoints creates a completely
|
||||
impenetrable macro/inline/ifdef mess.
|
||||
|
||||
There is no point in avoiding tracepoints for most of the traps/exceptions.
|
||||
For the more expensive tracepoints, like pagefaults, this can be handled with
|
||||
an explicit static key.
|
||||
|
||||
Preparatory patch to remove the tracing IDT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/20170828064956.593094539@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2feb1b316d48004d905278c02a55902cab0be8be)
|
||||
Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
|
||||
(cherry picked from commit 15e0ff2a63fdd93f8881e2ebba5c048c5b601e57)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d58a56e851c339d8d9d311dc9b4fad6abbf8bf19)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/trace/common.h | 15 +++++++++++++++
|
||||
arch/x86/include/asm/trace/exceptions.h | 4 +---
|
||||
arch/x86/include/asm/trace/irq_vectors.h | 4 +---
|
||||
arch/x86/kernel/tracepoint.c | 9 ++++++++-
|
||||
4 files changed, 25 insertions(+), 7 deletions(-)
|
||||
create mode 100644 arch/x86/include/asm/trace/common.h
|
||||
|
||||
diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
|
||||
new file mode 100644
|
||||
index 000000000000..b1eb7b18ee8a
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/include/asm/trace/common.h
|
||||
@@ -0,0 +1,15 @@
|
||||
+#ifndef _ASM_TRACE_COMMON_H
|
||||
+#define _ASM_TRACE_COMMON_H
|
||||
+
|
||||
+extern int trace_irq_vector_regfunc(void);
|
||||
+extern void trace_irq_vector_unregfunc(void);
|
||||
+
|
||||
+#ifdef CONFIG_TRACING
|
||||
+DECLARE_STATIC_KEY_FALSE(trace_irqvectors_key);
|
||||
+#define trace_irqvectors_enabled() \
|
||||
+ static_branch_unlikely(&trace_irqvectors_key)
|
||||
+#else
|
||||
+static inline bool trace_irqvectors_enabled(void) { return false; }
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
|
||||
index 2422b14c50a7..960a5b50ac3b 100644
|
||||
--- a/arch/x86/include/asm/trace/exceptions.h
|
||||
+++ b/arch/x86/include/asm/trace/exceptions.h
|
||||
@@ -5,9 +5,7 @@
|
||||
#define _TRACE_PAGE_FAULT_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
-
|
||||
-extern int trace_irq_vector_regfunc(void);
|
||||
-extern void trace_irq_vector_unregfunc(void);
|
||||
+#include <asm/trace/common.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(x86_exceptions,
|
||||
|
||||
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
|
||||
index 32dd6a9e343c..7825b4426e7e 100644
|
||||
--- a/arch/x86/include/asm/trace/irq_vectors.h
|
||||
+++ b/arch/x86/include/asm/trace/irq_vectors.h
|
||||
@@ -5,9 +5,7 @@
|
||||
#define _TRACE_IRQ_VECTORS_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
-
|
||||
-extern int trace_irq_vector_regfunc(void);
|
||||
-extern void trace_irq_vector_unregfunc(void);
|
||||
+#include <asm/trace/common.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(x86_irq_vector,
|
||||
|
||||
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
|
||||
index 15515132bf0d..dd4aa04bb95c 100644
|
||||
--- a/arch/x86/kernel/tracepoint.c
|
||||
+++ b/arch/x86/kernel/tracepoint.c
|
||||
@@ -4,9 +4,11 @@
|
||||
* Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
|
||||
*
|
||||
*/
|
||||
+#include <linux/jump_label.h>
|
||||
+#include <linux/atomic.h>
|
||||
+
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/desc.h>
|
||||
-#include <linux/atomic.h>
|
||||
|
||||
atomic_t trace_idt_ctr = ATOMIC_INIT(0);
|
||||
struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
|
||||
@@ -15,6 +17,7 @@ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
|
||||
/* No need to be aligned, but done to keep all IDTs defined the same way. */
|
||||
gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
|
||||
|
||||
+DEFINE_STATIC_KEY_FALSE(trace_irqvectors_key);
|
||||
static int trace_irq_vector_refcount;
|
||||
static DEFINE_MUTEX(irq_vector_mutex);
|
||||
|
||||
@@ -36,6 +39,8 @@ static void switch_idt(void *arg)
|
||||
|
||||
int trace_irq_vector_regfunc(void)
|
||||
{
|
||||
+ static_branch_inc(&trace_irqvectors_key);
|
||||
+
|
||||
mutex_lock(&irq_vector_mutex);
|
||||
if (!trace_irq_vector_refcount) {
|
||||
set_trace_idt_ctr(1);
|
||||
@@ -49,6 +54,8 @@ int trace_irq_vector_regfunc(void)
|
||||
|
||||
void trace_irq_vector_unregfunc(void)
|
||||
{
|
||||
+ static_branch_dec(&trace_irqvectors_key);
|
||||
+
|
||||
mutex_lock(&irq_vector_mutex);
|
||||
trace_irq_vector_refcount--;
|
||||
if (!trace_irq_vector_refcount) {
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,189 @@
|
||||
From 0b7f51014f5219ece1ca55662495bd036f3bd00d Mon Sep 17 00:00:00 2001
|
||||
From: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Mon, 17 Jul 2017 16:10:33 -0500
|
||||
Subject: [PATCH 049/231] x86/boot: Add early cmdline parsing for options with
|
||||
arguments
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add a cmdline_find_option() function to look for cmdline options that
|
||||
take arguments. The argument is returned in a supplied buffer and the
|
||||
argument length (regardless of whether it fits in the supplied buffer)
|
||||
is returned, with -1 indicating not found.
|
||||
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brijesh Singh <brijesh.singh@amd.com>
|
||||
Cc: Dave Young <dyoung@redhat.com>
|
||||
Cc: Dmitry Vyukov <dvyukov@google.com>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Cc: Larry Woodman <lwoodman@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Matt Fleming <matt@codeblueprint.co.uk>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Radim Krčmář <rkrcmar@redhat.com>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
|
||||
Cc: kasan-dev@googlegroups.com
|
||||
Cc: kvm@vger.kernel.org
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: linux-doc@vger.kernel.org
|
||||
Cc: linux-efi@vger.kernel.org
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e505371dd83963caae1a37ead9524e8d997341be)
|
||||
Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
|
||||
(cherry picked from commit 37569cd003aa69a57d5666530436c2d973a57b26)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b9f03418aa9b8ecbb1c7f32ac2bfe68fd21de4f5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/cmdline.h | 2 +
|
||||
arch/x86/lib/cmdline.c | 105 +++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 107 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
|
||||
index e01f7f7ccb0c..84ae170bc3d0 100644
|
||||
--- a/arch/x86/include/asm/cmdline.h
|
||||
+++ b/arch/x86/include/asm/cmdline.h
|
||||
@@ -2,5 +2,7 @@
|
||||
#define _ASM_X86_CMDLINE_H
|
||||
|
||||
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
|
||||
+int cmdline_find_option(const char *cmdline_ptr, const char *option,
|
||||
+ char *buffer, int bufsize);
|
||||
|
||||
#endif /* _ASM_X86_CMDLINE_H */
|
||||
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
|
||||
index 5cc78bf57232..3261abb21ef4 100644
|
||||
--- a/arch/x86/lib/cmdline.c
|
||||
+++ b/arch/x86/lib/cmdline.c
|
||||
@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
|
||||
return 0; /* Buffer overrun */
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Find a non-boolean option (i.e. option=argument). In accordance with
|
||||
+ * standard Linux practice, if this option is repeated, this returns the
|
||||
+ * last instance on the command line.
|
||||
+ *
|
||||
+ * @cmdline: the cmdline string
|
||||
+ * @max_cmdline_size: the maximum size of cmdline
|
||||
+ * @option: option string to look for
|
||||
+ * @buffer: memory buffer to return the option argument
|
||||
+ * @bufsize: size of the supplied memory buffer
|
||||
+ *
|
||||
+ * Returns the length of the argument (regardless of if it was
|
||||
+ * truncated to fit in the buffer), or -1 on not found.
|
||||
+ */
|
||||
+static int
|
||||
+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
|
||||
+ const char *option, char *buffer, int bufsize)
|
||||
+{
|
||||
+ char c;
|
||||
+ int pos = 0, len = -1;
|
||||
+ const char *opptr = NULL;
|
||||
+ char *bufptr = buffer;
|
||||
+ enum {
|
||||
+ st_wordstart = 0, /* Start of word/after whitespace */
|
||||
+ st_wordcmp, /* Comparing this word */
|
||||
+ st_wordskip, /* Miscompare, skip */
|
||||
+ st_bufcpy, /* Copying this to buffer */
|
||||
+ } state = st_wordstart;
|
||||
+
|
||||
+ if (!cmdline)
|
||||
+ return -1; /* No command line */
|
||||
+
|
||||
+ /*
|
||||
+ * This 'pos' check ensures we do not overrun
|
||||
+ * a non-NULL-terminated 'cmdline'
|
||||
+ */
|
||||
+ while (pos++ < max_cmdline_size) {
|
||||
+ c = *(char *)cmdline++;
|
||||
+ if (!c)
|
||||
+ break;
|
||||
+
|
||||
+ switch (state) {
|
||||
+ case st_wordstart:
|
||||
+ if (myisspace(c))
|
||||
+ break;
|
||||
+
|
||||
+ state = st_wordcmp;
|
||||
+ opptr = option;
|
||||
+ /* fall through */
|
||||
+
|
||||
+ case st_wordcmp:
|
||||
+ if ((c == '=') && !*opptr) {
|
||||
+ /*
|
||||
+ * We matched all the way to the end of the
|
||||
+ * option we were looking for, prepare to
|
||||
+ * copy the argument.
|
||||
+ */
|
||||
+ len = 0;
|
||||
+ bufptr = buffer;
|
||||
+ state = st_bufcpy;
|
||||
+ break;
|
||||
+ } else if (c == *opptr++) {
|
||||
+ /*
|
||||
+ * We are currently matching, so continue
|
||||
+ * to the next character on the cmdline.
|
||||
+ */
|
||||
+ break;
|
||||
+ }
|
||||
+ state = st_wordskip;
|
||||
+ /* fall through */
|
||||
+
|
||||
+ case st_wordskip:
|
||||
+ if (myisspace(c))
|
||||
+ state = st_wordstart;
|
||||
+ break;
|
||||
+
|
||||
+ case st_bufcpy:
|
||||
+ if (myisspace(c)) {
|
||||
+ state = st_wordstart;
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * Increment len, but don't overrun the
|
||||
+ * supplied buffer and leave room for the
|
||||
+ * NULL terminator.
|
||||
+ */
|
||||
+ if (++len < bufsize)
|
||||
+ *bufptr++ = c;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (bufsize)
|
||||
+ *bufptr = '\0';
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
int cmdline_find_option_bool(const char *cmdline, const char *option)
|
||||
{
|
||||
return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
|
||||
}
|
||||
+
|
||||
+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
|
||||
+ int bufsize)
|
||||
+{
|
||||
+ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
|
||||
+ buffer, bufsize);
|
||||
+}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,192 @@
|
||||
From 7c5d42f31bf68647dd00ac2fef9057d113e8072d Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Sat, 9 Sep 2017 00:56:03 +0300
|
||||
Subject: [PATCH 050/231] mm, x86/mm: Fix performance regression in
|
||||
get_user_pages_fast()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The 0-day test bot found a performance regression that was tracked down to
|
||||
switching x86 to the generic get_user_pages_fast() implementation:
|
||||
|
||||
http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
|
||||
|
||||
The regression was caused by the fact that we now use local_irq_save() +
|
||||
local_irq_restore() in get_user_pages_fast() to disable interrupts.
|
||||
In x86 implementation local_irq_disable() + local_irq_enable() was used.
|
||||
|
||||
The fix is to make get_user_pages_fast() use local_irq_disable(),
|
||||
leaving local_irq_save() for __get_user_pages_fast() that can be called
|
||||
with interrupts disabled.
|
||||
|
||||
Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
|
||||
|
||||
Before: Average: 14.91 ms, stddev: 0.45 ms
|
||||
After: Average: 10.76 ms, stddev: 0.18 ms
|
||||
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Huang Ying <ying.huang@intel.com>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
|
||||
Cc: linux-mm@kvack.org
|
||||
Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
|
||||
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 5241f4b2c68284612e34910305f3234e4a64701b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
mm/gup.c | 97 ++++++++++++++++++++++++++++++++++++++--------------------------
|
||||
1 file changed, 58 insertions(+), 39 deletions(-)
|
||||
|
||||
diff --git a/mm/gup.c b/mm/gup.c
|
||||
index 23f01c40c88f..4a789f1c6a27 100644
|
||||
--- a/mm/gup.c
|
||||
+++ b/mm/gup.c
|
||||
@@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
return 1;
|
||||
}
|
||||
|
||||
+static void gup_pgd_range(unsigned long addr, unsigned long end,
|
||||
+ int write, struct page **pages, int *nr)
|
||||
+{
|
||||
+ unsigned long next;
|
||||
+ pgd_t *pgdp;
|
||||
+
|
||||
+ pgdp = pgd_offset(current->mm, addr);
|
||||
+ do {
|
||||
+ pgd_t pgd = READ_ONCE(*pgdp);
|
||||
+
|
||||
+ next = pgd_addr_end(addr, end);
|
||||
+ if (pgd_none(pgd))
|
||||
+ return;
|
||||
+ if (unlikely(pgd_huge(pgd))) {
|
||||
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
||||
+ pages, nr))
|
||||
+ return;
|
||||
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
||||
+ PGDIR_SHIFT, next, write, pages, nr))
|
||||
+ return;
|
||||
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
|
||||
+ return;
|
||||
+ } while (pgdp++, addr = next, addr != end);
|
||||
+}
|
||||
+
|
||||
+#ifndef gup_fast_permitted
|
||||
+/*
|
||||
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
|
||||
+ * we need to fall back to the slow version:
|
||||
+ */
|
||||
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
+{
|
||||
+ unsigned long len, end;
|
||||
+
|
||||
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
+ end = start + len;
|
||||
+ return end >= start;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
|
||||
* the regular GUP. It will only return non-negative values.
|
||||
@@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
- struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
- unsigned long next, flags;
|
||||
- pgd_t *pgdp;
|
||||
+ unsigned long flags;
|
||||
int nr = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
@@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
* block IPIs that come from THPs splitting.
|
||||
*/
|
||||
|
||||
- local_irq_save(flags);
|
||||
- pgdp = pgd_offset(mm, addr);
|
||||
- do {
|
||||
- pgd_t pgd = READ_ONCE(*pgdp);
|
||||
-
|
||||
- next = pgd_addr_end(addr, end);
|
||||
- if (pgd_none(pgd))
|
||||
- break;
|
||||
- if (unlikely(pgd_huge(pgd))) {
|
||||
- if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
||||
- pages, &nr))
|
||||
- break;
|
||||
- } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
||||
- PGDIR_SHIFT, next, write, pages, &nr))
|
||||
- break;
|
||||
- } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
|
||||
- break;
|
||||
- } while (pgdp++, addr = next, addr != end);
|
||||
- local_irq_restore(flags);
|
||||
+ if (gup_fast_permitted(start, nr_pages, write)) {
|
||||
+ local_irq_save(flags);
|
||||
+ gup_pgd_range(addr, end, write, pages, &nr);
|
||||
+ local_irq_restore(flags);
|
||||
+ }
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
-#ifndef gup_fast_permitted
|
||||
-/*
|
||||
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
|
||||
- * we need to fall back to the slow version:
|
||||
- */
|
||||
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
-{
|
||||
- unsigned long len, end;
|
||||
-
|
||||
- len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
- end = start + len;
|
||||
- return end >= start;
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
/**
|
||||
* get_user_pages_fast() - pin user pages in memory
|
||||
* @start: starting user address
|
||||
@@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
+ unsigned long addr, len, end;
|
||||
int nr = 0, ret = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
+ addr = start;
|
||||
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
+ end = start + len;
|
||||
+
|
||||
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
|
||||
+ (void __user *)start, len)))
|
||||
+ return 0;
|
||||
|
||||
if (gup_fast_permitted(start, nr_pages, write)) {
|
||||
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
||||
+ local_irq_disable();
|
||||
+ gup_pgd_range(addr, end, write, pages, &nr);
|
||||
+ local_irq_enable();
|
||||
ret = nr;
|
||||
}
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,149 @@
|
||||
From 7d7ea8398b5f0cf22b8faec46c95543031c5fe94 Mon Sep 17 00:00:00 2001
|
||||
From: Uros Bizjak <ubizjak@gmail.com>
|
||||
Date: Wed, 6 Sep 2017 17:18:08 +0200
|
||||
Subject: [PATCH 051/231] x86/asm: Remove unnecessary \n\t in front of CC_SET()
|
||||
from asm templates
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
There is no need for \n\t in front of CC_SET(), as the macro already includes these two.
|
||||
|
||||
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/20170906151808.5634-1-ubizjak@gmail.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(backported from commit 3c52b5c64326d9dcfee4e10611c53ec1b1b20675)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 1c3f29ec5586e3aecfde2c6f83b8786e1aecd9ac)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/archrandom.h | 8 ++++----
|
||||
arch/x86/include/asm/bitops.h | 10 +++++-----
|
||||
arch/x86/include/asm/percpu.h | 2 +-
|
||||
arch/x86/include/asm/rmwcc.h | 2 +-
|
||||
4 files changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
|
||||
index 5b0579abb398..3ac991d81e74 100644
|
||||
--- a/arch/x86/include/asm/archrandom.h
|
||||
+++ b/arch/x86/include/asm/archrandom.h
|
||||
@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
|
||||
bool ok;
|
||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||
do {
|
||||
- asm volatile(RDRAND_LONG "\n\t"
|
||||
+ asm volatile(RDRAND_LONG
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
if (ok)
|
||||
@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||
bool ok;
|
||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||
do {
|
||||
- asm volatile(RDRAND_INT "\n\t"
|
||||
+ asm volatile(RDRAND_INT
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
if (ok)
|
||||
@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||
static inline bool rdseed_long(unsigned long *v)
|
||||
{
|
||||
bool ok;
|
||||
- asm volatile(RDSEED_LONG "\n\t"
|
||||
+ asm volatile(RDSEED_LONG
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
return ok;
|
||||
@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
|
||||
static inline bool rdseed_int(unsigned int *v)
|
||||
{
|
||||
bool ok;
|
||||
- asm volatile(RDSEED_INT "\n\t"
|
||||
+ asm volatile(RDSEED_INT
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
return ok;
|
||||
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
|
||||
index 854022772c5b..8cee8db6dffb 100644
|
||||
--- a/arch/x86/include/asm/bitops.h
|
||||
+++ b/arch/x86/include/asm/bitops.h
|
||||
@@ -142,7 +142,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
bool negative;
|
||||
- asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
|
||||
+ asm volatile(LOCK_PREFIX "andb %2,%1"
|
||||
CC_SET(s)
|
||||
: CC_OUT(s) (negative), ADDR
|
||||
: "ir" ((char) ~(1 << nr)) : "memory");
|
||||
@@ -245,7 +245,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm("bts %2,%1\n\t"
|
||||
+ asm("bts %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr));
|
||||
@@ -285,7 +285,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("btr %2,%1\n\t"
|
||||
+ asm volatile("btr %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr));
|
||||
@@ -297,7 +297,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("btc %2,%1\n\t"
|
||||
+ asm volatile("btc %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr) : "memory");
|
||||
@@ -328,7 +328,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("bt %2,%1\n\t"
|
||||
+ asm volatile("bt %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit)
|
||||
: "m" (*(unsigned long *)addr), "Ir" (nr));
|
||||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
|
||||
index 9fa03604b2b3..b21a475fd7ed 100644
|
||||
--- a/arch/x86/include/asm/percpu.h
|
||||
+++ b/arch/x86/include/asm/percpu.h
|
||||
@@ -525,7 +525,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
- asm volatile("bt "__percpu_arg(2)",%1\n\t"
|
||||
+ asm volatile("bt "__percpu_arg(2)",%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit)
|
||||
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
|
||||
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
|
||||
index 661dd305694a..dd7ba5aa8dca 100644
|
||||
--- a/arch/x86/include/asm/rmwcc.h
|
||||
+++ b/arch/x86/include/asm/rmwcc.h
|
||||
@@ -28,7 +28,7 @@ cc_label: \
|
||||
#define __GEN_RMWcc(fullop, var, cc, ...) \
|
||||
do { \
|
||||
bool c; \
|
||||
- asm volatile (fullop ";" CC_SET(cc) \
|
||||
+ asm volatile (fullop CC_SET(cc) \
|
||||
: "+m" (var), CC_OUT(cc) (c) \
|
||||
: __VA_ARGS__ : "memory"); \
|
||||
return c; \
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,58 @@
|
||||
From 985d8e62ef5f1b006da5e175858e552c0dbda771 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:30 -0500
|
||||
Subject: [PATCH 052/231] objtool: Don't report end of section error after an
|
||||
empty unwind hint
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
If asm code specifies an UNWIND_HINT_EMPTY hint, don't warn if the
|
||||
section ends unexpectedly. This can happen with the xen-head.S code
|
||||
because the hypercall_page is "text" but it's all zeros.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/ddafe199dd8797e40e3c2777373347eba1d65572.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 00d96180dc38ef872ac471c2d3e14b067cbd895d)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 9d22f903bba24f2ac86de8a81dc1788f9957aca8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
tools/objtool/check.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
|
||||
index 368275de5f23..0a86fd0ac082 100644
|
||||
--- a/tools/objtool/check.c
|
||||
+++ b/tools/objtool/check.c
|
||||
@@ -1652,11 +1652,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||
if (insn->dead_end)
|
||||
return 0;
|
||||
|
||||
- insn = next_insn;
|
||||
- if (!insn) {
|
||||
+ if (!next_insn) {
|
||||
+ if (state.cfa.base == CFI_UNDEFINED)
|
||||
+ return 0;
|
||||
WARN("%s: unexpected end of section", sec->name);
|
||||
return 1;
|
||||
}
|
||||
+
|
||||
+ insn = next_insn;
|
||||
}
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.14.2
|
||||
|
54
patches/kernel/0053-x86-head-Remove-confusing-comment.patch
Normal file
54
patches/kernel/0053-x86-head-Remove-confusing-comment.patch
Normal file
@ -0,0 +1,54 @@
|
||||
From 109bbd8c905806e929b67ca0b2eaf57ff88f10c1 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:31 -0500
|
||||
Subject: [PATCH 053/231] x86/head: Remove confusing comment
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This comment is actively wrong and confusing. It refers to the
|
||||
registers' stack offsets after the pt_regs has been constructed on the
|
||||
stack, but this code is *before* that.
|
||||
|
||||
At this point the stack just has the standard iret frame, for which no
|
||||
comment should be needed.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/a3c267b770fc56c9b86df9c11c552848248aace2.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 17270717e80de33a884ad328fea5f407d87f6d6a)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 49187e0108184688304260a75d29b789f36f3a2b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 6225550883df..627c798b2f15 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -254,10 +254,6 @@ bad_address:
|
||||
|
||||
__INIT
|
||||
ENTRY(early_idt_handler_array)
|
||||
- # 104(%rsp) %rflags
|
||||
- # 96(%rsp) %cs
|
||||
- # 88(%rsp) %rip
|
||||
- # 80(%rsp) error code
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,48 @@
|
||||
From 5ba2d2eca16a62a64166661ea849c4916ae2f44f Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:32 -0500
|
||||
Subject: [PATCH 054/231] x86/head: Remove unused 'bad_address' code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
It's no longer possible for this code to be executed, so remove it.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/32a46fe92d2083700599b36872b26e7dfd7b7965.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit a8b88e84d124bc92c4808e72b8b8c0e0bb538630)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d790ff35a3a49ef0942a3484f024551433fd2ddf)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 627c798b2f15..37d9905d38d6 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -249,9 +249,6 @@ ENDPROC(start_cpu0)
|
||||
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
|
||||
__FINITDATA
|
||||
|
||||
-bad_address:
|
||||
- jmp bad_address
|
||||
-
|
||||
__INIT
|
||||
ENTRY(early_idt_handler_array)
|
||||
i = 0
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,66 @@
|
||||
From 2527d40adb84012c90cab350bd5ebbce65daaff7 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:33 -0500
|
||||
Subject: [PATCH 055/231] x86/head: Fix head ELF function annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
These functions aren't callable C-type functions, so don't annotate them
|
||||
as such.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/36eb182738c28514f8bf95e403d89b6413a88883.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 015a2ea5478680fc5216d56b7ff306f2a74efaf9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 707517a56928fed1c03eefdb4e00fa57dfddc4fd)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 37d9905d38d6..45b18b1a6417 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -218,7 +218,7 @@ ENTRY(secondary_startup_64)
|
||||
pushq %rax # target address in negative space
|
||||
lretq
|
||||
.Lafter_lret:
|
||||
-ENDPROC(secondary_startup_64)
|
||||
+END(secondary_startup_64)
|
||||
|
||||
#include "verify_cpu.S"
|
||||
|
||||
@@ -261,7 +261,7 @@ ENTRY(early_idt_handler_array)
|
||||
i = i + 1
|
||||
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
|
||||
.endr
|
||||
-ENDPROC(early_idt_handler_array)
|
||||
+END(early_idt_handler_array)
|
||||
|
||||
early_idt_handler_common:
|
||||
/*
|
||||
@@ -304,7 +304,7 @@ early_idt_handler_common:
|
||||
20:
|
||||
decl early_recursion_flag(%rip)
|
||||
jmp restore_regs_and_iret
|
||||
-ENDPROC(early_idt_handler_common)
|
||||
+END(early_idt_handler_common)
|
||||
|
||||
__INITDATA
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,53 @@
|
||||
From 1b9783c7335f17e3f5bdb8776dd06de62dcfba81 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:34 -0500
|
||||
Subject: [PATCH 056/231] x86/boot: Annotate verify_cpu() as a callable
|
||||
function
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
verify_cpu() is a callable function. Annotate it as such.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/293024b8a080832075312f38c07ccc970fc70292.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e93db75a0054b23a874a12c63376753544f3fe9e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 48a432c46026f864e194cdf9a8133e7c9109274e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/verify_cpu.S | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
|
||||
index 014ea59aa153..3d3c2f71f617 100644
|
||||
--- a/arch/x86/kernel/verify_cpu.S
|
||||
+++ b/arch/x86/kernel/verify_cpu.S
|
||||
@@ -33,7 +33,7 @@
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
-verify_cpu:
|
||||
+ENTRY(verify_cpu)
|
||||
pushf # Save caller passed flags
|
||||
push $0 # Kill any dangerous flags
|
||||
popf
|
||||
@@ -139,3 +139,4 @@ verify_cpu:
|
||||
popf # Restore caller passed flags
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
+ENDPROC(verify_cpu)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,56 @@
|
||||
From 6f359bcacdf28ca9f6bfc29bd0aa4e22489eb34d Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:35 -0500
|
||||
Subject: [PATCH 057/231] x86/xen: Fix xen head ELF annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Mark the ends of the startup_xen and hypercall_page code sections.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/3a80a394d30af43d9cefa1a29628c45ed8420c97.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2582d3df95c76d3b686453baf90b64d57e87d1e8)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b9410861f1436c1e38958a9b85009ad252aad9f5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-head.S | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
|
||||
index 72a8e6adebe6..2f0cff2cc265 100644
|
||||
--- a/arch/x86/xen/xen-head.S
|
||||
+++ b/arch/x86/xen/xen-head.S
|
||||
@@ -33,7 +33,7 @@ ENTRY(startup_xen)
|
||||
mov $init_thread_union+THREAD_SIZE, %_ASM_SP
|
||||
|
||||
jmp xen_start_kernel
|
||||
-
|
||||
+END(startup_xen)
|
||||
__FINIT
|
||||
#endif
|
||||
|
||||
@@ -47,7 +47,7 @@ ENTRY(hypercall_page)
|
||||
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
|
||||
#include <asm/xen-hypercalls.h>
|
||||
#undef HYPERCALL
|
||||
-
|
||||
+END(hypercall_page)
|
||||
.popsection
|
||||
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,70 @@
|
||||
From b90136e442c889a7344992acc34764729936ab92 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:36 -0500
|
||||
Subject: [PATCH 058/231] x86/xen: Add unwind hint annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add unwind hint annotations to the xen head code so the ORC unwinder can
|
||||
read head_64.o.
|
||||
|
||||
hypercall_page needs empty annotations at 32-byte intervals to match the
|
||||
'xen_hypercall_*' ELF functions at those locations.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/70ed2eb516fe9266be766d953f93c2571bca88cc.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit abbe1cac6214d81d2f4e149aba64a8760703144e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 9f099a90cb39eaff9b3187e8a6d8151c8af53db1)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/xen-head.S | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
|
||||
index 2f0cff2cc265..ad189ab2c329 100644
|
||||
--- a/arch/x86/xen/xen-head.S
|
||||
+++ b/arch/x86/xen/xen-head.S
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <asm/boot.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/page_types.h>
|
||||
+#include <asm/unwind_hints.h>
|
||||
|
||||
#include <xen/interface/elfnote.h>
|
||||
#include <xen/interface/features.h>
|
||||
@@ -19,6 +20,7 @@
|
||||
#ifdef CONFIG_XEN_PV
|
||||
__INIT
|
||||
ENTRY(startup_xen)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
cld
|
||||
|
||||
/* Clear .bss */
|
||||
@@ -40,7 +42,10 @@ END(startup_xen)
|
||||
.pushsection .text
|
||||
.balign PAGE_SIZE
|
||||
ENTRY(hypercall_page)
|
||||
- .skip PAGE_SIZE
|
||||
+ .rept (PAGE_SIZE / 32)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
+ .skip 32
|
||||
+ .endr
|
||||
|
||||
#define HYPERCALL(n) \
|
||||
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
|
||||
--
|
||||
2.14.2
|
||||
|
134
patches/kernel/0059-x86-head-Add-unwind-hint-annotations.patch
Normal file
134
patches/kernel/0059-x86-head-Add-unwind-hint-annotations.patch
Normal file
@ -0,0 +1,134 @@
|
||||
From 6ef121f444bab6ac294e1eda62eb727ee639c6d7 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Mon, 18 Sep 2017 21:43:37 -0500
|
||||
Subject: [PATCH 059/231] x86/head: Add unwind hint annotations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Jiri Slaby reported an ORC issue when unwinding from an idle task. The
|
||||
stack was:
|
||||
|
||||
ffffffff811083c2 do_idle+0x142/0x1e0
|
||||
ffffffff8110861d cpu_startup_entry+0x5d/0x60
|
||||
ffffffff82715f58 start_kernel+0x3ff/0x407
|
||||
ffffffff827153e8 x86_64_start_kernel+0x14e/0x15d
|
||||
ffffffff810001bf secondary_startup_64+0x9f/0xa0
|
||||
|
||||
The ORC unwinder errored out at secondary_startup_64 because the head
|
||||
code isn't annotated yet so there wasn't a corresponding ORC entry.
|
||||
|
||||
Fix that and any other head-related unwinding issues by adding unwind
|
||||
hints to the head code.
|
||||
|
||||
Reported-by: Jiri Slaby <jslaby@suse.cz>
|
||||
Tested-by: Jiri Slaby <jslaby@suse.cz>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/78ef000a2f68f545d6eef44ee912edceaad82ccf.1505764066.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 2704fbb672d0d9a19414907fda7949283dcef6a1)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b63a868e404e64172afefea553c6a40963a151db)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/Makefile | 1 -
|
||||
arch/x86/kernel/head_64.S | 14 ++++++++++++--
|
||||
2 files changed, 12 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
|
||||
index 287eac7d207f..e2315aecc441 100644
|
||||
--- a/arch/x86/kernel/Makefile
|
||||
+++ b/arch/x86/kernel/Makefile
|
||||
@@ -26,7 +26,6 @@ KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
|
||||
-OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 45b18b1a6417..d081bc7a027d 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -49,6 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
.code64
|
||||
.globl startup_64
|
||||
startup_64:
|
||||
+ UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||
* and someone has loaded an identity mapped page table
|
||||
@@ -81,6 +82,7 @@ startup_64:
|
||||
movq $(early_top_pgt - __START_KERNEL_map), %rax
|
||||
jmp 1f
|
||||
ENTRY(secondary_startup_64)
|
||||
+ UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||
* and someone has loaded a mapped page table.
|
||||
@@ -116,6 +118,7 @@ ENTRY(secondary_startup_64)
|
||||
movq $1f, %rax
|
||||
jmp *%rax
|
||||
1:
|
||||
+ UNWIND_HINT_EMPTY
|
||||
|
||||
/* Check if nx is implemented */
|
||||
movl $0x80000001, %eax
|
||||
@@ -230,6 +233,7 @@ END(secondary_startup_64)
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movq initial_stack(%rip), %rsp
|
||||
+ UNWIND_HINT_EMPTY
|
||||
jmp .Ljump_to_C_code
|
||||
ENDPROC(start_cpu0)
|
||||
#endif
|
||||
@@ -254,13 +258,18 @@ ENTRY(early_idt_handler_array)
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
- pushq $0 # Dummy error code, to make stack frame uniform
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
+ pushq $0 # Dummy error code, to make stack frame uniform
|
||||
+ .else
|
||||
+ UNWIND_HINT_IRET_REGS offset=8
|
||||
.endif
|
||||
pushq $i # 72(%rsp) Vector number
|
||||
jmp early_idt_handler_common
|
||||
+ UNWIND_HINT_IRET_REGS
|
||||
i = i + 1
|
||||
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
|
||||
.endr
|
||||
+ UNWIND_HINT_IRET_REGS offset=16
|
||||
END(early_idt_handler_array)
|
||||
|
||||
early_idt_handler_common:
|
||||
@@ -289,6 +298,7 @@ early_idt_handler_common:
|
||||
pushq %r13 /* pt_regs->r13 */
|
||||
pushq %r14 /* pt_regs->r14 */
|
||||
pushq %r15 /* pt_regs->r15 */
|
||||
+ UNWIND_HINT_REGS
|
||||
|
||||
cmpq $14,%rsi /* Page fault? */
|
||||
jnz 10f
|
||||
@@ -411,7 +421,7 @@ ENTRY(phys_base)
|
||||
EXPORT_SYMBOL(phys_base)
|
||||
|
||||
#include "../../x86/xen/xen-head.S"
|
||||
-
|
||||
+
|
||||
__PAGE_ALIGNED_BSS
|
||||
NEXT_PAGE(empty_zero_page)
|
||||
.skip PAGE_SIZE
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,43 @@
|
||||
From 012bd636105426b93026d594261663e8a728dcc1 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Beulich <JBeulich@suse.com>
|
||||
Date: Mon, 25 Sep 2017 02:06:19 -0600
|
||||
Subject: [PATCH 060/231] ACPI / APEI: adjust a local variable type in
|
||||
ghes_ioremap_pfn_irq()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Match up with what 7edda0886b ("acpi: apei: handle SEA notification
|
||||
type for ARMv8") did for ghes_ioremap_pfn_nmi().
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
(cherry picked from commit 095f613c6b386a1704b73a549e9ba66c1d5381ae)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 0a5c092882b0ead111dc3a6bbaa870665b54d796)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
drivers/acpi/apei/ghes.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
|
||||
index d661d452b238..3628078ee351 100644
|
||||
--- a/drivers/acpi/apei/ghes.c
|
||||
+++ b/drivers/acpi/apei/ghes.c
|
||||
@@ -174,7 +174,8 @@ static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
|
||||
|
||||
static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
|
||||
{
|
||||
- unsigned long vaddr, paddr;
|
||||
+ unsigned long vaddr;
|
||||
+ phys_addr_t paddr;
|
||||
pgprot_t prot;
|
||||
|
||||
vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,44 @@
|
||||
From a405dd2b4172c310101f96c2152598bc24e9e6f8 Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@kernel.org>
|
||||
Date: Thu, 12 Oct 2017 09:24:30 +0200
|
||||
Subject: [PATCH 061/231] x86/unwinder: Make CONFIG_UNWINDER_ORC=y the default
|
||||
in the 64-bit defconfig
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Increase testing coverage by turning on the primary x86 unwinder for
|
||||
the 64-bit defconfig.
|
||||
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-kernel@vger.kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 1e4078f0bba46ad61b69548abe6a6faf63b89380)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit ebcba768c005dce435721f6c998e3afdf5534666)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/configs/x86_64_defconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
|
||||
index 4a4b16e56d35..eb65c248708d 100644
|
||||
--- a/arch/x86/configs/x86_64_defconfig
|
||||
+++ b/arch/x86/configs/x86_64_defconfig
|
||||
@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
+CONFIG_ORC_UNWINDER=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,66 @@
|
||||
From bc21c74b922871588bf6626bff34fa084ed60d71 Mon Sep 17 00:00:00 2001
|
||||
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
|
||||
Date: Thu, 12 Oct 2017 18:06:19 -0400
|
||||
Subject: [PATCH 062/231] x86/fpu/debug: Remove unused 'x86_fpu_state' and
|
||||
'x86_fpu_deactivate_state' tracepoints
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Commit:
|
||||
|
||||
d1898b733619 ("x86/fpu: Add tracepoints to dump FPU state at key points")
|
||||
|
||||
... added the 'x86_fpu_state' and 'x86_fpu_deactivate_state' trace points,
|
||||
but never used them. Today they are still not used. As they take up
|
||||
and waste memory, remove them.
|
||||
|
||||
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/20171012180619.670b68b6@gandalf.local.home
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 127a1bea40f7f2a36bc7207ea4d51bb6b4e936fa)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c7c367ddb6ffb6af2cfee287960e97c4aefc6548)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/trace/fpu.h | 10 ----------
|
||||
1 file changed, 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
|
||||
index 342e59789fcd..fed7d9ecae60 100644
|
||||
--- a/arch/x86/include/asm/trace/fpu.h
|
||||
+++ b/arch/x86/include/asm/trace/fpu.h
|
||||
@@ -36,11 +36,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
|
||||
)
|
||||
);
|
||||
|
||||
-DEFINE_EVENT(x86_fpu, x86_fpu_state,
|
||||
- TP_PROTO(struct fpu *fpu),
|
||||
- TP_ARGS(fpu)
|
||||
-);
|
||||
-
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
@@ -76,11 +71,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
|
||||
TP_ARGS(fpu)
|
||||
);
|
||||
|
||||
-DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
|
||||
- TP_PROTO(struct fpu *fpu),
|
||||
- TP_ARGS(fpu)
|
||||
-);
|
||||
-
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,273 @@
|
||||
From dcc61cf4d482d478979471795935733845fe757e Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 13 Oct 2017 15:02:00 -0500
|
||||
Subject: [PATCH 063/231] x86/unwind: Rename unwinder config options to
|
||||
'CONFIG_UNWINDER_*'
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Rename the unwinder config options from:
|
||||
|
||||
CONFIG_ORC_UNWINDER
|
||||
CONFIG_FRAME_POINTER_UNWINDER
|
||||
CONFIG_GUESS_UNWINDER
|
||||
|
||||
to:
|
||||
|
||||
CONFIG_UNWINDER_ORC
|
||||
CONFIG_UNWINDER_FRAME_POINTER
|
||||
CONFIG_UNWINDER_GUESS
|
||||
|
||||
... in order to give them a more logical config namespace.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/73972fc7e2762e91912c6b9584582703d6f1b8cc.1507924831.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 11af847446ed0d131cf24d16a7ef3d5ea7a49554)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27ab2a240a797b073ce63385b1d5db06e44fc3ae)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Documentation/x86/orc-unwinder.txt | 2 +-
|
||||
Makefile | 4 ++--
|
||||
arch/x86/kernel/Makefile | 6 +++---
|
||||
scripts/Makefile.build | 2 +-
|
||||
arch/x86/include/asm/module.h | 2 +-
|
||||
arch/x86/include/asm/unwind.h | 8 ++++----
|
||||
include/asm-generic/vmlinux.lds.h | 2 +-
|
||||
arch/x86/Kconfig | 2 +-
|
||||
arch/x86/Kconfig.debug | 10 +++++-----
|
||||
arch/x86/configs/tiny.config | 4 ++--
|
||||
arch/x86/configs/x86_64_defconfig | 2 +-
|
||||
lib/Kconfig.debug | 2 +-
|
||||
12 files changed, 23 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
|
||||
index af0c9a4c65a6..cd4b29be29af 100644
|
||||
--- a/Documentation/x86/orc-unwinder.txt
|
||||
+++ b/Documentation/x86/orc-unwinder.txt
|
||||
@@ -4,7 +4,7 @@ ORC unwinder
|
||||
Overview
|
||||
--------
|
||||
|
||||
-The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
|
||||
+The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
|
||||
similar in concept to a DWARF unwinder. The difference is that the
|
||||
format of the ORC data is much simpler than DWARF, which in turn allows
|
||||
the ORC unwinder to be much simpler and faster.
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 490ce18685ea..b740e3dc9ff8 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -965,8 +965,8 @@ ifdef CONFIG_STACK_VALIDATION
|
||||
ifeq ($(has_libelf),1)
|
||||
objtool_target := tools/objtool FORCE
|
||||
else
|
||||
- ifdef CONFIG_ORC_UNWINDER
|
||||
- $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
+ ifdef CONFIG_UNWINDER_ORC
|
||||
+ $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
else
|
||||
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
endif
|
||||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
|
||||
index e2315aecc441..5bf0d5a473b4 100644
|
||||
--- a/arch/x86/kernel/Makefile
|
||||
+++ b/arch/x86/kernel/Makefile
|
||||
@@ -125,9 +125,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
obj-$(CONFIG_TRACING) += tracepoint.o
|
||||
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
|
||||
|
||||
-obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
|
||||
-obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
|
||||
-obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
|
||||
+obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
|
||||
+obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
|
||||
+obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
|
||||
|
||||
###
|
||||
# 64 bit specific files
|
||||
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
|
||||
index ab2c8ef43cdb..436005392047 100644
|
||||
--- a/scripts/Makefile.build
|
||||
+++ b/scripts/Makefile.build
|
||||
@@ -258,7 +258,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
|
||||
|
||||
__objtool_obj := $(objtree)/tools/objtool/objtool
|
||||
|
||||
-objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
|
||||
+objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
|
||||
|
||||
ifndef CONFIG_FRAME_POINTER
|
||||
objtool_args += --no-fp
|
||||
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
|
||||
index 9eb7c718aaf8..9f05a1002aa9 100644
|
||||
--- a/arch/x86/include/asm/module.h
|
||||
+++ b/arch/x86/include/asm/module.h
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <asm/orc_types.h>
|
||||
|
||||
struct mod_arch_specific {
|
||||
-#ifdef CONFIG_ORC_UNWINDER
|
||||
+#ifdef CONFIG_UNWINDER_ORC
|
||||
unsigned int num_orcs;
|
||||
int *orc_unwind_ip;
|
||||
struct orc_entry *orc_unwind;
|
||||
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
|
||||
index e9f793e2df7a..35d67dc7b69f 100644
|
||||
--- a/arch/x86/include/asm/unwind.h
|
||||
+++ b/arch/x86/include/asm/unwind.h
|
||||
@@ -12,11 +12,11 @@ struct unwind_state {
|
||||
struct task_struct *task;
|
||||
int graph_idx;
|
||||
bool error;
|
||||
-#if defined(CONFIG_ORC_UNWINDER)
|
||||
+#if defined(CONFIG_UNWINDER_ORC)
|
||||
bool signal, full_regs;
|
||||
unsigned long sp, bp, ip;
|
||||
struct pt_regs *regs;
|
||||
-#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
+#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
bool got_irq;
|
||||
unsigned long *bp, *orig_sp, ip;
|
||||
struct pt_regs *regs;
|
||||
@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
__unwind_start(state, task, regs, first_frame);
|
||||
}
|
||||
|
||||
-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
+#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
@@ -65,7 +65,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
}
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_ORC_UNWINDER
|
||||
+#ifdef CONFIG_UNWINDER_ORC
|
||||
void unwind_init(void);
|
||||
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
|
||||
void *orc, size_t orc_size);
|
||||
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
|
||||
index 9fdb54a95976..e71e42432360 100644
|
||||
--- a/include/asm-generic/vmlinux.lds.h
|
||||
+++ b/include/asm-generic/vmlinux.lds.h
|
||||
@@ -686,7 +686,7 @@
|
||||
#define BUG_TABLE
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_ORC_UNWINDER
|
||||
+#ifdef CONFIG_UNWINDER_ORC
|
||||
#define ORC_UNWIND_TABLE \
|
||||
. = ALIGN(4); \
|
||||
.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index 3a0b8cb57caf..bf9f03740c30 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -168,7 +168,7 @@ config X86
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
- select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
|
||||
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index c441b5d65ec8..5435a943f894 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -358,13 +358,13 @@ config PUNIT_ATOM_DEBUG
|
||||
|
||||
choice
|
||||
prompt "Choose kernel unwinder"
|
||||
- default FRAME_POINTER_UNWINDER
|
||||
+ default UNWINDER_FRAME_POINTER
|
||||
---help---
|
||||
This determines which method will be used for unwinding kernel stack
|
||||
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||
livepatch, lockdep, and more.
|
||||
|
||||
-config FRAME_POINTER_UNWINDER
|
||||
+config UNWINDER_FRAME_POINTER
|
||||
bool "Frame pointer unwinder"
|
||||
select FRAME_POINTER
|
||||
---help---
|
||||
@@ -379,7 +379,7 @@ config FRAME_POINTER_UNWINDER
|
||||
consistency model, as this is currently the only way to get a
|
||||
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
|
||||
-config ORC_UNWINDER
|
||||
+config UNWINDER_ORC
|
||||
bool "ORC unwinder"
|
||||
depends on X86_64
|
||||
select STACK_VALIDATION
|
||||
@@ -396,7 +396,7 @@ config ORC_UNWINDER
|
||||
Enabling this option will increase the kernel's runtime memory usage
|
||||
by roughly 2-4MB, depending on your kernel config.
|
||||
|
||||
-config GUESS_UNWINDER
|
||||
+config UNWINDER_GUESS
|
||||
bool "Guess unwinder"
|
||||
depends on EXPERT
|
||||
---help---
|
||||
@@ -411,7 +411,7 @@ config GUESS_UNWINDER
|
||||
endchoice
|
||||
|
||||
config FRAME_POINTER
|
||||
- depends on !ORC_UNWINDER && !GUESS_UNWINDER
|
||||
+ depends on !UNWINDER_ORC && !UNWINDER_GUESS
|
||||
bool
|
||||
|
||||
endmenu
|
||||
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
|
||||
index 550cd5012b73..66c9e2aab16c 100644
|
||||
--- a/arch/x86/configs/tiny.config
|
||||
+++ b/arch/x86/configs/tiny.config
|
||||
@@ -1,5 +1,5 @@
|
||||
CONFIG_NOHIGHMEM=y
|
||||
# CONFIG_HIGHMEM4G is not set
|
||||
# CONFIG_HIGHMEM64G is not set
|
||||
-CONFIG_GUESS_UNWINDER=y
|
||||
-# CONFIG_FRAME_POINTER_UNWINDER is not set
|
||||
+CONFIG_UNWINDER_GUESS=y
|
||||
+# CONFIG_UNWINDER_FRAME_POINTER is not set
|
||||
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
|
||||
index eb65c248708d..e32fc1f274d8 100644
|
||||
--- a/arch/x86/configs/x86_64_defconfig
|
||||
+++ b/arch/x86/configs/x86_64_defconfig
|
||||
@@ -299,7 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
-CONFIG_ORC_UNWINDER=y
|
||||
+CONFIG_UNWINDER_ORC=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
|
||||
index 0b4d1b3880b0..4f6ca5f60f7e 100644
|
||||
--- a/lib/Kconfig.debug
|
||||
+++ b/lib/Kconfig.debug
|
||||
@@ -375,7 +375,7 @@ config STACK_VALIDATION
|
||||
that runtime stack traces are more reliable.
|
||||
|
||||
This is also a prerequisite for generation of ORC unwind data, which
|
||||
- is needed for CONFIG_ORC_UNWINDER.
|
||||
+ is needed for CONFIG_UNWINDER_ORC.
|
||||
|
||||
For more information, see
|
||||
tools/objtool/Documentation/stack-validation.txt.
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,90 @@
|
||||
From a8ec58033a185db5d8c180d3508d34b8ae3a1c89 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 13 Oct 2017 15:02:01 -0500
|
||||
Subject: [PATCH 064/231] x86/unwind: Make CONFIG_UNWINDER_ORC=y the default in
|
||||
kconfig for 64-bit
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The ORC unwinder has been stable in testing so far. Give it much wider
|
||||
testing by making it the default in kconfig for x86_64. It's not yet
|
||||
supported for 32-bit, so leave frame pointers as the default there.
|
||||
|
||||
Suggested-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/9b1237bbe7244ed9cdf8db2dcb1253e37e1c341e.1507924831.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit fc72ae40e30327aa24eb88a24b9c7058f938bd36)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit aff8d5169f46ae6ac0eb26a5ba745aaf9afa0704)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/Kconfig.debug | 33 +++++++++++++++++----------------
|
||||
1 file changed, 17 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
||||
index 5435a943f894..7d88e9878a75 100644
|
||||
--- a/arch/x86/Kconfig.debug
|
||||
+++ b/arch/x86/Kconfig.debug
|
||||
@@ -358,27 +358,13 @@ config PUNIT_ATOM_DEBUG
|
||||
|
||||
choice
|
||||
prompt "Choose kernel unwinder"
|
||||
- default UNWINDER_FRAME_POINTER
|
||||
+ default UNWINDER_ORC if X86_64
|
||||
+ default UNWINDER_FRAME_POINTER if X86_32
|
||||
---help---
|
||||
This determines which method will be used for unwinding kernel stack
|
||||
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||
livepatch, lockdep, and more.
|
||||
|
||||
-config UNWINDER_FRAME_POINTER
|
||||
- bool "Frame pointer unwinder"
|
||||
- select FRAME_POINTER
|
||||
- ---help---
|
||||
- This option enables the frame pointer unwinder for unwinding kernel
|
||||
- stack traces.
|
||||
-
|
||||
- The unwinder itself is fast and it uses less RAM than the ORC
|
||||
- unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
- overall performance will degrade by roughly 5-10%.
|
||||
-
|
||||
- This option is recommended if you want to use the livepatch
|
||||
- consistency model, as this is currently the only way to get a
|
||||
- reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
-
|
||||
config UNWINDER_ORC
|
||||
bool "ORC unwinder"
|
||||
depends on X86_64
|
||||
@@ -396,6 +382,21 @@ config UNWINDER_ORC
|
||||
Enabling this option will increase the kernel's runtime memory usage
|
||||
by roughly 2-4MB, depending on your kernel config.
|
||||
|
||||
+config UNWINDER_FRAME_POINTER
|
||||
+ bool "Frame pointer unwinder"
|
||||
+ select FRAME_POINTER
|
||||
+ ---help---
|
||||
+ This option enables the frame pointer unwinder for unwinding kernel
|
||||
+ stack traces.
|
||||
+
|
||||
+ The unwinder itself is fast and it uses less RAM than the ORC
|
||||
+ unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
+ overall performance will degrade by roughly 5-10%.
|
||||
+
|
||||
+ This option is recommended if you want to use the livepatch
|
||||
+ consistency model, as this is currently the only way to get a
|
||||
+ reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
+
|
||||
config UNWINDER_GUESS
|
||||
bool "Guess unwinder"
|
||||
depends on EXPERT
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,69 @@
|
||||
From 2f76ec868c18486b60f1b76428339a2fa0c2e5d8 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:41 -0700
|
||||
Subject: [PATCH 065/231] bitops: Add clear/set_bit32() to linux/bitops.h
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add two simple wrappers around set_bit/clear_bit() that accept
|
||||
the common case of an u32 array. This avoids writing
|
||||
casts in all callers.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-2-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit cbe96375025e14fc76f9ed42ee5225120d7210f8)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 06d31c11519ca0e8f9b7cab857f442ef44dfc1b2)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
include/linux/bitops.h | 26 ++++++++++++++++++++++++++
|
||||
1 file changed, 26 insertions(+)
|
||||
|
||||
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
|
||||
index a83c822c35c2..eb257a96db6d 100644
|
||||
--- a/include/linux/bitops.h
|
||||
+++ b/include/linux/bitops.h
|
||||
@@ -226,6 +226,32 @@ static inline unsigned long __ffs64(u64 word)
|
||||
return __ffs((unsigned long)word);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * clear_bit32 - Clear a bit in memory for u32 array
|
||||
+ * @nr: Bit to clear
|
||||
+ * @addr: u32 * address of bitmap
|
||||
+ *
|
||||
+ * Same as clear_bit, but avoids needing casts for u32 arrays.
|
||||
+ */
|
||||
+
|
||||
+static __always_inline void clear_bit32(long nr, volatile u32 *addr)
|
||||
+{
|
||||
+ clear_bit(nr, (volatile unsigned long *)addr);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * set_bit32 - Set a bit in memory for u32 array
|
||||
+ * @nr: Bit to clear
|
||||
+ * @addr: u32 * address of bitmap
|
||||
+ *
|
||||
+ * Same as set_bit, but avoids needing casts for u32 arrays.
|
||||
+ */
|
||||
+
|
||||
+static __always_inline void set_bit32(long nr, volatile u32 *addr)
|
||||
+{
|
||||
+ set_bit(nr, (volatile unsigned long *)addr);
|
||||
+}
|
||||
+
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#ifndef set_mask_bits
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,221 @@
|
||||
From d637e8b6db21d282cfb1fd789ae60807cc87c867 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:42 -0700
|
||||
Subject: [PATCH 066/231] x86/cpuid: Add generic table for CPUID dependencies
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Some CPUID features depend on other features. Currently it's
|
||||
possible to to clear dependent features, but not clear the base features,
|
||||
which can cause various interesting problems.
|
||||
|
||||
This patch implements a generic table to describe dependencies
|
||||
between CPUID features, to be used by all code that clears
|
||||
CPUID.
|
||||
|
||||
Some subsystems (like XSAVE) had an own implementation of this,
|
||||
but it's better to do it all in a single place for everyone.
|
||||
|
||||
Then clear_cpu_cap and setup_clear_cpu_cap always look up
|
||||
this table and clear all dependencies too.
|
||||
|
||||
This is intended to be a practical table: only for features
|
||||
that make sense to clear. If someone for example clears FPU,
|
||||
or other features that are essentially part of the required
|
||||
base feature set, not much is going to work. Handling
|
||||
that is right now out of scope. We're only handling
|
||||
features which can be usefully cleared.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Jonathan McDowell <noodles@earth.li>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-3-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 0b00de857a648dafe7020878c7a27cf776f5edf4)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 35672522f2fc9a2e116ed1766f190bc08ef5582a)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/Makefile | 1 +
|
||||
arch/x86/include/asm/cpufeature.h | 9 ++-
|
||||
arch/x86/include/asm/cpufeatures.h | 5 ++
|
||||
arch/x86/kernel/cpu/cpuid-deps.c | 113 +++++++++++++++++++++++++++++++++++++
|
||||
4 files changed, 123 insertions(+), 5 deletions(-)
|
||||
create mode 100644 arch/x86/kernel/cpu/cpuid-deps.c
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
|
||||
index e17942c131c8..de260fae1017 100644
|
||||
--- a/arch/x86/kernel/cpu/Makefile
|
||||
+++ b/arch/x86/kernel/cpu/Makefile
|
||||
@@ -22,6 +22,7 @@ obj-y += rdrand.o
|
||||
obj-y += match.o
|
||||
obj-y += bugs.o
|
||||
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
|
||||
+obj-y += cpuid-deps.o
|
||||
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
|
||||
index d59c15c3defd..225fd8374fae 100644
|
||||
--- a/arch/x86/include/asm/cpufeature.h
|
||||
+++ b/arch/x86/include/asm/cpufeature.h
|
||||
@@ -125,11 +125,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
||||
|
||||
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
|
||||
-#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
|
||||
-#define setup_clear_cpu_cap(bit) do { \
|
||||
- clear_cpu_cap(&boot_cpu_data, bit); \
|
||||
- set_bit(bit, (unsigned long *)cpu_caps_cleared); \
|
||||
-} while (0)
|
||||
+
|
||||
+extern void setup_clear_cpu_cap(unsigned int bit);
|
||||
+extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
|
||||
+
|
||||
#define setup_force_cpu_cap(bit) do { \
|
||||
set_cpu_cap(&boot_cpu_data, bit); \
|
||||
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
||||
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
||||
index 5a28e8e55e36..f4e145c4b06f 100644
|
||||
--- a/arch/x86/include/asm/cpufeatures.h
|
||||
+++ b/arch/x86/include/asm/cpufeatures.h
|
||||
@@ -21,6 +21,11 @@
|
||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||
*/
|
||||
|
||||
+/*
|
||||
+ * When adding new features here that depend on other features,
|
||||
+ * please update the table in kernel/cpu/cpuid-deps.c
|
||||
+ */
|
||||
+
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
new file mode 100644
|
||||
index 000000000000..e48eb7313120
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
@@ -0,0 +1,113 @@
|
||||
+/* Declare dependencies between CPUIDs */
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <asm/cpufeature.h>
|
||||
+
|
||||
+struct cpuid_dep {
|
||||
+ unsigned int feature;
|
||||
+ unsigned int depends;
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * Table of CPUID features that depend on others.
|
||||
+ *
|
||||
+ * This only includes dependencies that can be usefully disabled, not
|
||||
+ * features part of the base set (like FPU).
|
||||
+ *
|
||||
+ * Note this all is not __init / __initdata because it can be
|
||||
+ * called from cpu hotplug. It shouldn't do anything in this case,
|
||||
+ * but it's difficult to tell that to the init reference checker.
|
||||
+ */
|
||||
+const static struct cpuid_dep cpuid_deps[] = {
|
||||
+ { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
|
||||
+ { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
|
||||
+ { X86_FEATURE_XMM, X86_FEATURE_FXSR },
|
||||
+ { X86_FEATURE_XMM2, X86_FEATURE_XMM },
|
||||
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
|
||||
+ { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
|
||||
+ { X86_FEATURE_AES, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
|
||||
+ { X86_FEATURE_FMA, X86_FEATURE_AVX },
|
||||
+ { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
|
||||
+ { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
|
||||
+ { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
||||
+ {}
|
||||
+};
|
||||
+
|
||||
+static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
|
||||
+{
|
||||
+ clear_bit32(bit, c->x86_capability);
|
||||
+}
|
||||
+
|
||||
+static inline void __setup_clear_cpu_cap(unsigned int bit)
|
||||
+{
|
||||
+ clear_cpu_cap(&boot_cpu_data, bit);
|
||||
+ set_bit32(bit, cpu_caps_cleared);
|
||||
+}
|
||||
+
|
||||
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
+{
|
||||
+ if (!c)
|
||||
+ __setup_clear_cpu_cap(feature);
|
||||
+ else
|
||||
+ __clear_cpu_cap(c, feature);
|
||||
+}
|
||||
+
|
||||
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
+{
|
||||
+ bool changed;
|
||||
+ DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
|
||||
+ const struct cpuid_dep *d;
|
||||
+
|
||||
+ clear_feature(c, feature);
|
||||
+
|
||||
+ /* Collect all features to disable, handling dependencies */
|
||||
+ memset(disable, 0, sizeof(disable));
|
||||
+ __set_bit(feature, disable);
|
||||
+
|
||||
+ /* Loop until we get a stable state. */
|
||||
+ do {
|
||||
+ changed = false;
|
||||
+ for (d = cpuid_deps; d->feature; d++) {
|
||||
+ if (!test_bit(d->depends, disable))
|
||||
+ continue;
|
||||
+ if (__test_and_set_bit(d->feature, disable))
|
||||
+ continue;
|
||||
+
|
||||
+ changed = true;
|
||||
+ clear_feature(c, d->feature);
|
||||
+ }
|
||||
+ } while (changed);
|
||||
+}
|
||||
+
|
||||
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
+{
|
||||
+ do_clear_cpu_cap(c, feature);
|
||||
+}
|
||||
+
|
||||
+void setup_clear_cpu_cap(unsigned int feature)
|
||||
+{
|
||||
+ do_clear_cpu_cap(NULL, feature);
|
||||
+}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,97 @@
|
||||
From df469cffe07c84906be43e89d33f2a8a5312e60f Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:43 -0700
|
||||
Subject: [PATCH 067/231] x86/fpu: Parse clearcpuid= as early XSAVE argument
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
With a followon patch we want to make clearcpuid affect the XSAVE
|
||||
configuration. But xsave is currently initialized before arguments
|
||||
are parsed. Move the clearcpuid= parsing into the special
|
||||
early xsave argument parsing code.
|
||||
|
||||
Since clearcpuid= contains a = we need to keep the old __setup
|
||||
around as a dummy, otherwise it would end up as a environment
|
||||
variable in init's environment.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-4-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 0c2a3913d6f50503f7c59d83a6219e39508cc898)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27deb452eb0d27c406f3817ab057201aa8767abe)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 16 +++++++---------
|
||||
arch/x86/kernel/fpu/init.c | 11 +++++++++++
|
||||
2 files changed, 18 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index 4be7b209a3d6..ef7b1ba56363 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -1293,18 +1293,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
|
||||
pr_cont(")\n");
|
||||
}
|
||||
|
||||
-static __init int setup_disablecpuid(char *arg)
|
||||
+/*
|
||||
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
|
||||
+ * But we need to keep a dummy __setup around otherwise it would
|
||||
+ * show up as an environment variable for init.
|
||||
+ */
|
||||
+static __init int setup_clearcpuid(char *arg)
|
||||
{
|
||||
- int bit;
|
||||
-
|
||||
- if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
|
||||
- setup_clear_cpu_cap(bit);
|
||||
- else
|
||||
- return 0;
|
||||
-
|
||||
return 1;
|
||||
}
|
||||
-__setup("clearcpuid=", setup_disablecpuid);
|
||||
+__setup("clearcpuid=", setup_clearcpuid);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_ptr idt_descr __ro_after_init = {
|
||||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
|
||||
index d5d44c452624..07f0ab877f49 100644
|
||||
--- a/arch/x86/kernel/fpu/init.c
|
||||
+++ b/arch/x86/kernel/fpu/init.c
|
||||
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
|
||||
*/
|
||||
static void __init fpu__init_parse_early_param(void)
|
||||
{
|
||||
+ char arg[32];
|
||||
+ char *argptr = arg;
|
||||
+ int bit;
|
||||
+
|
||||
if (cmdline_find_option_bool(boot_command_line, "no387"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
||||
|
||||
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
+
|
||||
+ if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
|
||||
+ sizeof(arg)) &&
|
||||
+ get_option(&argptr, &bit) &&
|
||||
+ bit >= 0 &&
|
||||
+ bit < NCAPINTS * 32)
|
||||
+ setup_clear_cpu_cap(bit);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,90 @@
|
||||
From 1b88ea4170f72b4fed72e9235c88b6121f153b21 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:44 -0700
|
||||
Subject: [PATCH 068/231] x86/fpu: Make XSAVE check the base CPUID features
|
||||
before enabling
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Before enabling XSAVE, not only check the XSAVE specific CPUID bits,
|
||||
but also the base CPUID features of the respective XSAVE feature.
|
||||
This allows to disable individual XSAVE states using the existing
|
||||
clearcpuid= option, which can be useful for performance testing
|
||||
and debugging, and also in general avoids inconsistencies.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-5-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit ccb18db2ab9d923df07e7495123fe5fb02329713)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 2efda26f9ee0eeb9919772e90ca30dbe59008dc8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/fpu/xstate.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
|
||||
index c24ac1efb12d..3abe85b08234 100644
|
||||
--- a/arch/x86/kernel/fpu/xstate.c
|
||||
+++ b/arch/x86/kernel/fpu/xstate.c
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <asm/fpu/xstate.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
+#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* Although we spell it out in here, the Processor Trace
|
||||
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
|
||||
"unknown xstate feature" ,
|
||||
};
|
||||
|
||||
+static short xsave_cpuid_features[] __initdata = {
|
||||
+ X86_FEATURE_FPU,
|
||||
+ X86_FEATURE_XMM,
|
||||
+ X86_FEATURE_AVX,
|
||||
+ X86_FEATURE_MPX,
|
||||
+ X86_FEATURE_MPX,
|
||||
+ X86_FEATURE_AVX512F,
|
||||
+ X86_FEATURE_AVX512F,
|
||||
+ X86_FEATURE_AVX512F,
|
||||
+ X86_FEATURE_INTEL_PT,
|
||||
+ X86_FEATURE_PKU,
|
||||
+};
|
||||
+
|
||||
/*
|
||||
* Mask of xstate features supported by the CPU and the kernel:
|
||||
*/
|
||||
@@ -702,6 +716,7 @@ void __init fpu__init_system_xstate(void)
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
static int on_boot_cpu __initdata = 1;
|
||||
int err;
|
||||
+ int i;
|
||||
|
||||
WARN_ON_FPU(!on_boot_cpu);
|
||||
on_boot_cpu = 0;
|
||||
@@ -735,6 +750,14 @@ void __init fpu__init_system_xstate(void)
|
||||
goto out_disable;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Clear XSAVE features that are disabled in the normal CPUID.
|
||||
+ */
|
||||
+ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
|
||||
+ if (!boot_cpu_has(xsave_cpuid_features[i]))
|
||||
+ xfeatures_mask &= ~BIT(i);
|
||||
+ }
|
||||
+
|
||||
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
|
||||
|
||||
/* Enable xstate instructions to be able to continue with initialization: */
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,70 @@
|
||||
From 9b2405a12593b1ba7894cf249ddeada92a658463 Mon Sep 17 00:00:00 2001
|
||||
From: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Fri, 13 Oct 2017 14:56:45 -0700
|
||||
Subject: [PATCH 069/231] x86/fpu: Remove the explicit clearing of XSAVE
|
||||
dependent features
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Clearing a CPU feature with setup_clear_cpu_cap() clears all features
|
||||
which depend on it. Expressing feature dependencies in one place is
|
||||
easier to maintain than keeping functions like
|
||||
fpu__xstate_clear_all_cpu_caps() up to date.
|
||||
|
||||
The features which depend on XSAVE have their dependency expressed in the
|
||||
dependency table, so its sufficient to clear X86_FEATURE_XSAVE.
|
||||
|
||||
Remove the explicit clearing of XSAVE dependent features.
|
||||
|
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
||||
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/20171013215645.23166-6-andi@firstfloor.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 73e3a7d2a7c3be29a5a22b85026f6cfa5664267f)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit af445f9ba8bb30b47ccb5247b8f5ba28c9f2be3e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/fpu/xstate.c | 20 --------------------
|
||||
1 file changed, 20 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
|
||||
index 3abe85b08234..fd6882c42246 100644
|
||||
--- a/arch/x86/kernel/fpu/xstate.c
|
||||
+++ b/arch/x86/kernel/fpu/xstate.c
|
||||
@@ -73,26 +73,6 @@ unsigned int fpu_user_xstate_size;
|
||||
void fpu__xstate_clear_all_cpu_caps(void)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX2);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512F);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_PKU);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
|
||||
- setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,57 @@
|
||||
From 6d96a02c961d41d82738bce9806c430d99acc9f8 Mon Sep 17 00:00:00 2001
|
||||
From: Kees Cook <keescook@chromium.org>
|
||||
Date: Mon, 16 Oct 2017 16:22:31 -0700
|
||||
Subject: [PATCH 070/231] x86/platform/UV: Convert timers to use timer_setup()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
In preparation for unconditionally passing the struct timer_list pointer to
|
||||
all timer callbacks, switch to using the new timer_setup() and from_timer()
|
||||
to pass the timer pointer explicitly.
|
||||
|
||||
Signed-off-by: Kees Cook <keescook@chromium.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Dimitri Sivanich <sivanich@hpe.com>
|
||||
Cc: Russ Anderson <rja@hpe.com>
|
||||
Cc: Mike Travis <mike.travis@hpe.com>
|
||||
Link: https://lkml.kernel.org/r/20171016232231.GA100493@beast
|
||||
|
||||
(cherry picked from commit 376f3bcebdc999cc737d9052109cc33b573b3a8b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 869cbd2b31024e70d574527b8c6851bf2ebbe483)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
|
||||
index 0d57bb9079c9..c0b694810ff4 100644
|
||||
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
|
||||
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
|
||||
@@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
|
||||
/*
|
||||
* percpu heartbeat timer
|
||||
*/
|
||||
-static void uv_heartbeat(unsigned long ignored)
|
||||
+static void uv_heartbeat(struct timer_list *timer)
|
||||
{
|
||||
- struct timer_list *timer = &uv_scir_info->timer;
|
||||
unsigned char bits = uv_scir_info->state;
|
||||
|
||||
/* Flip heartbeat bit: */
|
||||
@@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
|
||||
struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
|
||||
|
||||
uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
|
||||
- setup_pinned_timer(timer, uv_heartbeat, cpu);
|
||||
+ timer_setup(timer, uv_heartbeat, TIMER_PINNED);
|
||||
timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
|
||||
add_timer_on(timer, cpu);
|
||||
uv_cpu_scir_info(cpu)->enabled = 1;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,73 @@
|
||||
From ca358ca3d22248f099a09d65ee25410cf3beebc5 Mon Sep 17 00:00:00 2001
|
||||
From: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
|
||||
Date: Sat, 14 Oct 2017 20:17:54 +0530
|
||||
Subject: [PATCH 071/231] objtool: Print top level commands on incorrect usage
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Print top-level objtool commands, along with the error on incorrect
|
||||
command line usage. Objtool command line parser exit's with code 129,
|
||||
for incorrect usage. Convert the cmd_usage() exit code also, to maintain
|
||||
consistency across objtool.
|
||||
|
||||
After the patch:
|
||||
|
||||
$ ./objtool -j
|
||||
|
||||
Unknown option: -j
|
||||
|
||||
usage: objtool COMMAND [ARGS]
|
||||
|
||||
Commands:
|
||||
check Perform stack metadata validation on an object file
|
||||
orc Generate in-place ORC unwind tables for an object file
|
||||
|
||||
$ echo $?
|
||||
129
|
||||
|
||||
Signed-off-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
|
||||
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/1507992474-16142-1-git-send-email-kamalesh@linux.vnet.ibm.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 6a93bb7e4a7d6670677d5b0eb980936eb9cc5d2e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit cd75c9c55a5f288e1d3f20c48c5c4c2caf3966e8)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
tools/objtool/objtool.c | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
|
||||
index 31e0f9143840..07f329919828 100644
|
||||
--- a/tools/objtool/objtool.c
|
||||
+++ b/tools/objtool/objtool.c
|
||||
@@ -70,7 +70,7 @@ static void cmd_usage(void)
|
||||
|
||||
printf("\n");
|
||||
|
||||
- exit(1);
|
||||
+ exit(129);
|
||||
}
|
||||
|
||||
static void handle_options(int *argc, const char ***argv)
|
||||
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
|
||||
break;
|
||||
} else {
|
||||
fprintf(stderr, "Unknown option: %s\n", cmd);
|
||||
- fprintf(stderr, "\n Usage: %s\n",
|
||||
- objtool_usage_string);
|
||||
- exit(1);
|
||||
+ cmd_usage();
|
||||
}
|
||||
|
||||
(*argv)++;
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,65 @@
|
||||
From a827c0ac43c2dc1e5e0528ebd4b2ca2d74534e18 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 18 Oct 2017 19:39:35 +0200
|
||||
Subject: [PATCH 072/231] x86/cpuid: Prevent out of bound access in
|
||||
do_clear_cpu_cap()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
do_clear_cpu_cap() allocates a bitmap to keep track of disabled feature
|
||||
dependencies. That bitmap is sized NCAPINTS * BITS_PER_INIT. The possible
|
||||
'features' which can be handed in are larger than this, because after the
|
||||
capabilities the bug 'feature' bits occupy another 32bit. Not really
|
||||
obvious...
|
||||
|
||||
So clearing any of the misfeature bits, as 32bit does for the F00F bug,
|
||||
accesses that bitmap out of bounds thereby corrupting the stack.
|
||||
|
||||
Size the bitmap proper and add a sanity check to catch accidental out of
|
||||
bound access.
|
||||
|
||||
Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies")
|
||||
Reported-by: kernel test robot <xiaolong.ye@intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andi Kleen <ak@linux.intel.com>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Link: https://lkml.kernel.org/r/20171018022023.GA12058@yexl-desktop
|
||||
(cherry picked from commit 57b8b1a1856adaa849d02d547411a553a531022b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 4b3a90bd20b35a97fd9ca6f6a71131f4417782e4)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/cpu/cpuid-deps.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
index e48eb7313120..c1d49842a411 100644
|
||||
--- a/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
@@ -75,11 +75,17 @@ static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
__clear_cpu_cap(c, feature);
|
||||
}
|
||||
|
||||
+/* Take the capabilities and the BUG bits into account */
|
||||
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
|
||||
+
|
||||
static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
{
|
||||
- bool changed;
|
||||
- DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
|
||||
+ DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
|
||||
const struct cpuid_dep *d;
|
||||
+ bool changed;
|
||||
+
|
||||
+ if (WARN_ON(feature >= MAX_FEATURE_BITS))
|
||||
+ return;
|
||||
|
||||
clear_feature(c, feature);
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,125 @@
|
||||
From 1e3688f9e76b3d8b218ed1afa292585a91b0b0c6 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Date: Wed, 18 Oct 2017 10:21:07 -0700
|
||||
Subject: [PATCH 073/231] x86/entry: Use SYSCALL_DEFINE() macros for
|
||||
sys_modify_ldt()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
We do not have tracepoints for sys_modify_ldt() because we define
|
||||
it directly instead of using the normal SYSCALL_DEFINEx() macros.
|
||||
|
||||
However, there is a reason sys_modify_ldt() does not use the macros:
|
||||
it has an 'int' return type instead of 'unsigned long'. This is
|
||||
a bug, but it's a bug cemented in the ABI.
|
||||
|
||||
What does this mean? If we return -EINVAL from a function that
|
||||
returns 'int', we have 0x00000000ffffffea in %rax. But, if we
|
||||
return -EINVAL from a function returning 'unsigned long', we end
|
||||
up with 0xffffffffffffffea in %rax, which is wrong.
|
||||
|
||||
To work around this and maintain the 'int' behavior while using
|
||||
the SYSCALL_DEFINEx() macros, so we add a cast to 'unsigned int'
|
||||
in both implementations of sys_modify_ldt().
|
||||
|
||||
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Reviewed-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/20171018172107.1A79C532@viggo.jf.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit da20ab35180780e4a6eadc804544f1fa967f3567)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d865f635f4b2c3307e79de9be5c49ea8bd4c43a6)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/syscalls.h | 2 +-
|
||||
arch/x86/kernel/ldt.c | 16 +++++++++++++---
|
||||
arch/x86/um/ldt.c | 7 +++++--
|
||||
3 files changed, 19 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
|
||||
index 91dfcafe27a6..bad25bb80679 100644
|
||||
--- a/arch/x86/include/asm/syscalls.h
|
||||
+++ b/arch/x86/include/asm/syscalls.h
|
||||
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
|
||||
asmlinkage long sys_iopl(unsigned int);
|
||||
|
||||
/* kernel/ldt.c */
|
||||
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
|
||||
+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
|
||||
|
||||
/* kernel/signal.c */
|
||||
asmlinkage long sys_rt_sigreturn(void);
|
||||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
|
||||
index f0e64db18ac8..0402d44deb4d 100644
|
||||
--- a/arch/x86/kernel/ldt.c
|
||||
+++ b/arch/x86/kernel/ldt.c
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp.h>
|
||||
+#include <linux/syscalls.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
@@ -294,8 +295,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
|
||||
return error;
|
||||
}
|
||||
|
||||
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
|
||||
- unsigned long bytecount)
|
||||
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
|
||||
+ unsigned long , bytecount)
|
||||
{
|
||||
int ret = -ENOSYS;
|
||||
|
||||
@@ -313,5 +314,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
|
||||
ret = write_ldt(ptr, bytecount, 0);
|
||||
break;
|
||||
}
|
||||
- return ret;
|
||||
+ /*
|
||||
+ * The SYSCALL_DEFINE() macros give us an 'unsigned long'
|
||||
+ * return type, but tht ABI for sys_modify_ldt() expects
|
||||
+ * 'int'. This cast gives us an int-sized value in %rax
|
||||
+ * for the return code. The 'unsigned' is necessary so
|
||||
+ * the compiler does not try to sign-extend the negative
|
||||
+ * return codes into the high half of the register when
|
||||
+ * taking the value from int->long.
|
||||
+ */
|
||||
+ return (unsigned int)ret;
|
||||
}
|
||||
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
|
||||
index 836a1eb5df43..3ee234b6234d 100644
|
||||
--- a/arch/x86/um/ldt.c
|
||||
+++ b/arch/x86/um/ldt.c
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/syscalls.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <os.h>
|
||||
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
|
||||
mm->arch.ldt.entry_count = 0;
|
||||
}
|
||||
|
||||
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
|
||||
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
|
||||
+ unsigned long , bytecount)
|
||||
{
|
||||
- return do_modify_ldt_skas(func, ptr, bytecount);
|
||||
+ /* See non-um modify_ldt() for why we do this cast */
|
||||
+ return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
|
||||
}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,141 @@
|
||||
From cc87e9d44044fb3ae4145d6ad9574697439b03bf Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:16 +0300
|
||||
Subject: [PATCH 074/231] mm/sparsemem: Allocate mem_section at runtime for
|
||||
CONFIG_SPARSEMEM_EXTREME=y
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Size of the mem_section[] array depends on the size of the physical address space.
|
||||
|
||||
In preparation for boot-time switching between paging modes on x86-64
|
||||
we need to make the allocation of mem_section[] dynamic, because otherwise
|
||||
we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB
|
||||
for 4-level paging and 2MB for 5-level paging mode.
|
||||
|
||||
The patch allocates the array on the first call to sparse_memory_present_with_active_regions().
|
||||
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit c70f71e01a0ae5d884abae0424618abe90b82011)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
include/linux/mmzone.h | 6 +++++-
|
||||
mm/page_alloc.c | 10 ++++++++++
|
||||
mm/sparse.c | 17 +++++++++++------
|
||||
3 files changed, 26 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index fc14b8b3f6ce..9c6c001a8c6c 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -1137,13 +1137,17 @@ struct mem_section {
|
||||
#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
-extern struct mem_section *mem_section[NR_SECTION_ROOTS];
|
||||
+extern struct mem_section **mem_section;
|
||||
#else
|
||||
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
|
||||
#endif
|
||||
|
||||
static inline struct mem_section *__nr_to_section(unsigned long nr)
|
||||
{
|
||||
+#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
+ if (!mem_section)
|
||||
+ return NULL;
|
||||
+#endif
|
||||
if (!mem_section[SECTION_NR_TO_ROOT(nr)])
|
||||
return NULL;
|
||||
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 1423da8dd16f..66eb23ab658d 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -5707,6 +5707,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
|
||||
unsigned long start_pfn, end_pfn;
|
||||
int i, this_nid;
|
||||
|
||||
+#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
+ if (!mem_section) {
|
||||
+ unsigned long size, align;
|
||||
+
|
||||
+ size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
|
||||
+ align = 1 << (INTERNODE_CACHE_SHIFT);
|
||||
+ mem_section = memblock_virt_alloc(size, align);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
|
||||
memory_present(this_nid, start_pfn, end_pfn);
|
||||
}
|
||||
diff --git a/mm/sparse.c b/mm/sparse.c
|
||||
index cdce7a7bb3f3..308a0789d1bb 100644
|
||||
--- a/mm/sparse.c
|
||||
+++ b/mm/sparse.c
|
||||
@@ -22,8 +22,7 @@
|
||||
* 1) mem_section - memory sections, mem_map's for valid memory
|
||||
*/
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
-struct mem_section *mem_section[NR_SECTION_ROOTS]
|
||||
- ____cacheline_internodealigned_in_smp;
|
||||
+struct mem_section **mem_section;
|
||||
#else
|
||||
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
|
||||
____cacheline_internodealigned_in_smp;
|
||||
@@ -104,7 +103,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
|
||||
int __section_nr(struct mem_section* ms)
|
||||
{
|
||||
unsigned long root_nr;
|
||||
- struct mem_section* root;
|
||||
+ struct mem_section *root = NULL;
|
||||
|
||||
for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
|
||||
root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
|
||||
@@ -115,7 +114,7 @@ int __section_nr(struct mem_section* ms)
|
||||
break;
|
||||
}
|
||||
|
||||
- VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
|
||||
+ VM_BUG_ON(!root);
|
||||
|
||||
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
|
||||
}
|
||||
@@ -333,11 +332,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
|
||||
static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
|
||||
{
|
||||
unsigned long usemap_snr, pgdat_snr;
|
||||
- static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
|
||||
- static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
|
||||
+ static unsigned long old_usemap_snr;
|
||||
+ static unsigned long old_pgdat_snr;
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
int usemap_nid;
|
||||
|
||||
+ /* First call */
|
||||
+ if (!old_usemap_snr) {
|
||||
+ old_usemap_snr = NR_MEM_SECTIONS;
|
||||
+ old_pgdat_snr = NR_MEM_SECTIONS;
|
||||
+ }
|
||||
+
|
||||
usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
|
||||
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
|
||||
if (usemap_snr == pgdat_snr)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,244 @@
|
||||
From f6bb8e560b2229af5dcf3127fc92e732539b4823 Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:18 +0300
|
||||
Subject: [PATCH 075/231] x86/kasan: Use the same shadow offset for 4- and
|
||||
5-level paging
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
We are going to support boot-time switching between 4- and 5-level
|
||||
paging. For KASAN it means we cannot have different KASAN_SHADOW_OFFSET
|
||||
for different paging modes: the constant is passed to gcc to generate
|
||||
code and cannot be changed at runtime.
|
||||
|
||||
This patch changes KASAN code to use 0xdffffc0000000000 as shadow offset
|
||||
for both 4- and 5-level paging.
|
||||
|
||||
For 5-level paging it means that shadow memory region is not aligned to
|
||||
PGD boundary anymore and we have to handle unaligned parts of the region
|
||||
properly.
|
||||
|
||||
In addition, we have to exclude paravirt code from KASAN instrumentation
|
||||
as we now use set_pgd() before KASAN is fully ready.
|
||||
|
||||
[kirill.shutemov@linux.intel.com: clenaup, changelog message]
|
||||
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-4-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 12a8cc7fcf54a8575f094be1e99032ec38aa045c)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 2ce428150e002623aa0ed2a1ab840fde5f860f32)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
Documentation/x86/x86_64/mm.txt | 2 +-
|
||||
arch/x86/kernel/Makefile | 3 +-
|
||||
arch/x86/mm/kasan_init_64.c | 101 +++++++++++++++++++++++++++++++---------
|
||||
arch/x86/Kconfig | 1 -
|
||||
4 files changed, 83 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
|
||||
index b0798e281aa6..3448e675b462 100644
|
||||
--- a/Documentation/x86/x86_64/mm.txt
|
||||
+++ b/Documentation/x86/x86_64/mm.txt
|
||||
@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
|
||||
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
||||
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||
... unused hole ...
|
||||
-ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
|
||||
+ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
||||
... unused hole ...
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
|
||||
index 5bf0d5a473b4..aa059806201d 100644
|
||||
--- a/arch/x86/kernel/Makefile
|
||||
+++ b/arch/x86/kernel/Makefile
|
||||
@@ -24,7 +24,8 @@ endif
|
||||
KASAN_SANITIZE_head$(BITS).o := n
|
||||
KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
-KASAN_SANITIZE_stacktrace.o := n
|
||||
+KASAN_SANITIZE_stacktrace.o := n
|
||||
+KASAN_SANITIZE_paravirt.o := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
|
||||
index 02c9d7553409..464089f33e80 100644
|
||||
--- a/arch/x86/mm/kasan_init_64.c
|
||||
+++ b/arch/x86/mm/kasan_init_64.c
|
||||
@@ -15,6 +15,8 @@
|
||||
extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
||||
extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||
|
||||
+static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||
+
|
||||
static int __init map_range(struct range *range)
|
||||
{
|
||||
unsigned long start;
|
||||
@@ -30,8 +32,10 @@ static void __init clear_pgds(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
+ /* See comment in kasan_init() */
|
||||
+ unsigned long pgd_end = end & PGDIR_MASK;
|
||||
|
||||
- for (; start < end; start += PGDIR_SIZE) {
|
||||
+ for (; start < pgd_end; start += PGDIR_SIZE) {
|
||||
pgd = pgd_offset_k(start);
|
||||
/*
|
||||
* With folded p4d, pgd_clear() is nop, use p4d_clear()
|
||||
@@ -42,29 +46,61 @@ static void __init clear_pgds(unsigned long start,
|
||||
else
|
||||
pgd_clear(pgd);
|
||||
}
|
||||
+
|
||||
+ pgd = pgd_offset_k(start);
|
||||
+ for (; start < end; start += P4D_SIZE)
|
||||
+ p4d_clear(p4d_offset(pgd, start));
|
||||
+}
|
||||
+
|
||||
+static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
|
||||
+{
|
||||
+ unsigned long p4d;
|
||||
+
|
||||
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
+ return (p4d_t *)pgd;
|
||||
+
|
||||
+ p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
|
||||
+ p4d += __START_KERNEL_map - phys_base;
|
||||
+ return (p4d_t *)p4d + p4d_index(addr);
|
||||
+}
|
||||
+
|
||||
+static void __init kasan_early_p4d_populate(pgd_t *pgd,
|
||||
+ unsigned long addr,
|
||||
+ unsigned long end)
|
||||
+{
|
||||
+ pgd_t pgd_entry;
|
||||
+ p4d_t *p4d, p4d_entry;
|
||||
+ unsigned long next;
|
||||
+
|
||||
+ if (pgd_none(*pgd)) {
|
||||
+ pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
|
||||
+ set_pgd(pgd, pgd_entry);
|
||||
+ }
|
||||
+
|
||||
+ p4d = early_p4d_offset(pgd, addr);
|
||||
+ do {
|
||||
+ next = p4d_addr_end(addr, end);
|
||||
+
|
||||
+ if (!p4d_none(*p4d))
|
||||
+ continue;
|
||||
+
|
||||
+ p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
|
||||
+ set_p4d(p4d, p4d_entry);
|
||||
+ } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
|
||||
}
|
||||
|
||||
static void __init kasan_map_early_shadow(pgd_t *pgd)
|
||||
{
|
||||
- int i;
|
||||
- unsigned long start = KASAN_SHADOW_START;
|
||||
+ /* See comment in kasan_init() */
|
||||
+ unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
|
||||
unsigned long end = KASAN_SHADOW_END;
|
||||
+ unsigned long next;
|
||||
|
||||
- for (i = pgd_index(start); start < end; i++) {
|
||||
- switch (CONFIG_PGTABLE_LEVELS) {
|
||||
- case 4:
|
||||
- pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
|
||||
- _KERNPG_TABLE);
|
||||
- break;
|
||||
- case 5:
|
||||
- pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
|
||||
- _KERNPG_TABLE);
|
||||
- break;
|
||||
- default:
|
||||
- BUILD_BUG();
|
||||
- }
|
||||
- start += PGDIR_SIZE;
|
||||
- }
|
||||
+ pgd += pgd_index(addr);
|
||||
+ do {
|
||||
+ next = pgd_addr_end(addr, end);
|
||||
+ kasan_early_p4d_populate(pgd, addr, next);
|
||||
+ } while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
@@ -101,7 +137,7 @@ void __init kasan_early_init(void)
|
||||
for (i = 0; i < PTRS_PER_PUD; i++)
|
||||
kasan_zero_pud[i] = __pud(pud_val);
|
||||
|
||||
- for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
|
||||
+ for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
|
||||
kasan_zero_p4d[i] = __p4d(p4d_val);
|
||||
|
||||
kasan_map_early_shadow(early_top_pgt);
|
||||
@@ -117,12 +153,35 @@ void __init kasan_init(void)
|
||||
#endif
|
||||
|
||||
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
|
||||
+
|
||||
+ /*
|
||||
+ * We use the same shadow offset for 4- and 5-level paging to
|
||||
+ * facilitate boot-time switching between paging modes.
|
||||
+ * As result in 5-level paging mode KASAN_SHADOW_START and
|
||||
+ * KASAN_SHADOW_END are not aligned to PGD boundary.
|
||||
+ *
|
||||
+ * KASAN_SHADOW_START doesn't share PGD with anything else.
|
||||
+ * We claim whole PGD entry to make things easier.
|
||||
+ *
|
||||
+ * KASAN_SHADOW_END lands in the last PGD entry and it collides with
|
||||
+ * bunch of things like kernel code, modules, EFI mapping, etc.
|
||||
+ * We need to take extra steps to not overwrite them.
|
||||
+ */
|
||||
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
+ void *ptr;
|
||||
+
|
||||
+ ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
|
||||
+ memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
|
||||
+ set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
|
||||
+ __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
|
||||
+ }
|
||||
+
|
||||
load_cr3(early_top_pgt);
|
||||
__flush_tlb_all();
|
||||
|
||||
- clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
+ clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
|
||||
|
||||
- kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
|
||||
+ kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET));
|
||||
|
||||
for (i = 0; i < E820_MAX_ENTRIES; i++) {
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index bf9f03740c30..67d07802ae95 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -300,7 +300,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||
config KASAN_SHADOW_OFFSET
|
||||
hex
|
||||
depends on KASAN
|
||||
- default 0xdff8000000000000 if X86_5LEVEL
|
||||
default 0xdffffc0000000000
|
||||
|
||||
config HAVE_INTEL_TXT
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,80 @@
|
||||
From cac8711a9ba742e97090cc5ec522360f1549c584 Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:19 +0300
|
||||
Subject: [PATCH 076/231] x86/xen: Provide pre-built page tables only for
|
||||
CONFIG_XEN_PV=y and CONFIG_XEN_PVH=y
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Looks like we only need pre-built page tables in the CONFIG_XEN_PV=y and
|
||||
CONFIG_XEN_PVH=y cases.
|
||||
|
||||
Let's not provide them for other configurations.
|
||||
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-5-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 4375c29985f155d7eb2346615d84e62d1b673682)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a883ee7f3c1dc64a8c946543ac598399353d1b03)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/kernel/head_64.S | 11 ++++++-----
|
||||
1 file changed, 6 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index d081bc7a027d..12daaa0b187f 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -37,11 +37,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
-#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
|
||||
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
+#endif
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
.text
|
||||
@@ -348,10 +349,7 @@ NEXT_PAGE(early_dynamic_pgts)
|
||||
|
||||
.data
|
||||
|
||||
-#ifndef CONFIG_XEN
|
||||
-NEXT_PAGE(init_top_pgt)
|
||||
- .fill 512,8,0
|
||||
-#else
|
||||
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
|
||||
@@ -368,6 +366,9 @@ NEXT_PAGE(level2_ident_pgt)
|
||||
* Don't set NX because code runs from these pages.
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
+#else
|
||||
+NEXT_PAGE(init_top_pgt)
|
||||
+ .fill 512,8,0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,316 @@
|
||||
From ed422950e50aeb9a05920e7387b4dd7c8dc2fc67 Mon Sep 17 00:00:00 2001
|
||||
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Date: Fri, 29 Sep 2017 17:08:20 +0300
|
||||
Subject: [PATCH 077/231] x86/xen: Drop 5-level paging support code from the
|
||||
XEN_PV code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
It was decided 5-level paging is not going to be supported in XEN_PV.
|
||||
|
||||
Let's drop the dead code from the XEN_PV code.
|
||||
|
||||
Tested-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-mm@kvack.org
|
||||
Link: http://lkml.kernel.org/r/20170929140821.37654-6-kirill.shutemov@linux.intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 773dd2fca581b0a80e5a33332cc8ee67e5a79cba)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 3fd0b7ef0094fd8bb3c8172d9b137ebe0d81ecbc)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/mmu_pv.c | 159 +++++++++++++++++++-------------------------------
|
||||
1 file changed, 60 insertions(+), 99 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
|
||||
index ba76f3ce997f..45bb2d462e44 100644
|
||||
--- a/arch/x86/xen/mmu_pv.c
|
||||
+++ b/arch/x86/xen/mmu_pv.c
|
||||
@@ -469,7 +469,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
|
||||
|
||||
-#if CONFIG_PGTABLE_LEVELS == 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
__visible pudval_t xen_pud_val(pud_t pud)
|
||||
{
|
||||
return pte_mfn_to_pfn(pud.pud);
|
||||
@@ -558,7 +558,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
+#endif /* CONFIG_X86_64 */
|
||||
|
||||
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
@@ -600,21 +600,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
bool last, unsigned long limit)
|
||||
{
|
||||
- int i, nr, flush = 0;
|
||||
+ int flush = 0;
|
||||
+ pud_t *pud;
|
||||
|
||||
- nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
|
||||
- for (i = 0; i < nr; i++) {
|
||||
- pud_t *pud;
|
||||
|
||||
- if (p4d_none(p4d[i]))
|
||||
- continue;
|
||||
+ if (p4d_none(*p4d))
|
||||
+ return flush;
|
||||
|
||||
- pud = pud_offset(&p4d[i], 0);
|
||||
- if (PTRS_PER_PUD > 1)
|
||||
- flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
- flush |= xen_pud_walk(mm, pud, func,
|
||||
- last && i == nr - 1, limit);
|
||||
- }
|
||||
+ pud = pud_offset(p4d, 0);
|
||||
+ if (PTRS_PER_PUD > 1)
|
||||
+ flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
+ flush |= xen_pud_walk(mm, pud, func, last, limit);
|
||||
return flush;
|
||||
}
|
||||
|
||||
@@ -664,8 +660,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
|
||||
continue;
|
||||
|
||||
p4d = p4d_offset(&pgd[i], 0);
|
||||
- if (PTRS_PER_P4D > 1)
|
||||
- flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
|
||||
flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
|
||||
}
|
||||
|
||||
@@ -1196,22 +1190,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
- unsigned int i;
|
||||
bool unpin;
|
||||
|
||||
unpin = (vaddr == 2 * PGDIR_SIZE);
|
||||
vaddr &= PMD_MASK;
|
||||
pgd = pgd_offset_k(vaddr);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
- for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
- if (p4d_none(p4d[i]))
|
||||
- continue;
|
||||
- xen_cleanmfnmap_p4d(p4d + i, unpin);
|
||||
- }
|
||||
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
- set_pgd(pgd, __pgd(0));
|
||||
- xen_cleanmfnmap_free_pgtbl(p4d, unpin);
|
||||
- }
|
||||
+ if (!p4d_none(*p4d))
|
||||
+ xen_cleanmfnmap_p4d(p4d, unpin);
|
||||
}
|
||||
|
||||
static void __init xen_pagetable_p2m_free(void)
|
||||
@@ -1717,7 +1703,7 @@ static void xen_release_pmd(unsigned long pfn)
|
||||
xen_release_ptpage(pfn, PT_PMD);
|
||||
}
|
||||
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PUD);
|
||||
@@ -2054,13 +2040,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
|
||||
*/
|
||||
void __init xen_relocate_p2m(void)
|
||||
{
|
||||
- phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
|
||||
+ phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
|
||||
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
|
||||
- int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
|
||||
+ int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
|
||||
pte_t *pt;
|
||||
pmd_t *pmd;
|
||||
pud_t *pud;
|
||||
- p4d_t *p4d = NULL;
|
||||
pgd_t *pgd;
|
||||
unsigned long *new_p2m;
|
||||
int save_pud;
|
||||
@@ -2070,11 +2055,7 @@ void __init xen_relocate_p2m(void)
|
||||
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
|
||||
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
|
||||
n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
|
||||
- if (PTRS_PER_P4D > 1)
|
||||
- n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
|
||||
- else
|
||||
- n_p4d = 0;
|
||||
- n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
|
||||
+ n_frames = n_pte + n_pt + n_pmd + n_pud;
|
||||
|
||||
new_area = xen_find_free_area(PFN_PHYS(n_frames));
|
||||
if (!new_area) {
|
||||
@@ -2090,76 +2071,56 @@ void __init xen_relocate_p2m(void)
|
||||
* To avoid any possible virtual address collision, just use
|
||||
* 2 * PUD_SIZE for the new area.
|
||||
*/
|
||||
- p4d_phys = new_area;
|
||||
- pud_phys = p4d_phys + PFN_PHYS(n_p4d);
|
||||
+ pud_phys = new_area;
|
||||
pmd_phys = pud_phys + PFN_PHYS(n_pud);
|
||||
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
|
||||
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
|
||||
|
||||
pgd = __va(read_cr3_pa());
|
||||
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
|
||||
- idx_p4d = 0;
|
||||
save_pud = n_pud;
|
||||
- do {
|
||||
- if (n_p4d > 0) {
|
||||
- p4d = early_memremap(p4d_phys, PAGE_SIZE);
|
||||
- clear_page(p4d);
|
||||
- n_pud = min(save_pud, PTRS_PER_P4D);
|
||||
- }
|
||||
- for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
- pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
- clear_page(pud);
|
||||
- for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
- idx_pmd++) {
|
||||
- pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
- clear_page(pmd);
|
||||
- for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
- idx_pt++) {
|
||||
- pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
- clear_page(pt);
|
||||
- for (idx_pte = 0;
|
||||
- idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
- idx_pte++) {
|
||||
- set_pte(pt + idx_pte,
|
||||
- pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
- p2m_pfn++;
|
||||
- }
|
||||
- n_pte -= PTRS_PER_PTE;
|
||||
- early_memunmap(pt, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(pt_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
- PFN_DOWN(pt_phys));
|
||||
- set_pmd(pmd + idx_pt,
|
||||
- __pmd(_PAGE_TABLE | pt_phys));
|
||||
- pt_phys += PAGE_SIZE;
|
||||
+ for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
+ pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
+ clear_page(pud);
|
||||
+ for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
+ idx_pmd++) {
|
||||
+ pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
+ clear_page(pmd);
|
||||
+ for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
+ idx_pt++) {
|
||||
+ pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
+ clear_page(pt);
|
||||
+ for (idx_pte = 0;
|
||||
+ idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
+ idx_pte++) {
|
||||
+ set_pte(pt + idx_pte,
|
||||
+ pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
+ p2m_pfn++;
|
||||
}
|
||||
- n_pt -= PTRS_PER_PMD;
|
||||
- early_memunmap(pmd, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(pmd_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
- PFN_DOWN(pmd_phys));
|
||||
- set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
- pmd_phys += PAGE_SIZE;
|
||||
+ n_pte -= PTRS_PER_PTE;
|
||||
+ early_memunmap(pt, PAGE_SIZE);
|
||||
+ make_lowmem_page_readonly(__va(pt_phys));
|
||||
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
+ PFN_DOWN(pt_phys));
|
||||
+ set_pmd(pmd + idx_pt,
|
||||
+ __pmd(_PAGE_TABLE | pt_phys));
|
||||
+ pt_phys += PAGE_SIZE;
|
||||
}
|
||||
- n_pmd -= PTRS_PER_PUD;
|
||||
- early_memunmap(pud, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(pud_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
- if (n_p4d > 0)
|
||||
- set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
|
||||
- else
|
||||
- set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
- pud_phys += PAGE_SIZE;
|
||||
- }
|
||||
- if (n_p4d > 0) {
|
||||
- save_pud -= PTRS_PER_P4D;
|
||||
- early_memunmap(p4d, PAGE_SIZE);
|
||||
- make_lowmem_page_readonly(__va(p4d_phys));
|
||||
- pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
|
||||
- set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
|
||||
- p4d_phys += PAGE_SIZE;
|
||||
+ n_pt -= PTRS_PER_PMD;
|
||||
+ early_memunmap(pmd, PAGE_SIZE);
|
||||
+ make_lowmem_page_readonly(__va(pmd_phys));
|
||||
+ pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
+ PFN_DOWN(pmd_phys));
|
||||
+ set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
+ pmd_phys += PAGE_SIZE;
|
||||
}
|
||||
- } while (++idx_p4d < n_p4d);
|
||||
+ n_pmd -= PTRS_PER_PUD;
|
||||
+ early_memunmap(pud, PAGE_SIZE);
|
||||
+ make_lowmem_page_readonly(__va(pud_phys));
|
||||
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
+ set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
+ pud_phys += PAGE_SIZE;
|
||||
+ }
|
||||
|
||||
/* Now copy the old p2m info to the new area. */
|
||||
memcpy(new_p2m, xen_p2m_addr, size);
|
||||
@@ -2386,7 +2347,7 @@ static void __init xen_post_allocator_init(void)
|
||||
pv_mmu_ops.set_pte = xen_set_pte;
|
||||
pv_mmu_ops.set_pmd = xen_set_pmd;
|
||||
pv_mmu_ops.set_pud = xen_set_pud;
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
pv_mmu_ops.set_p4d = xen_set_p4d;
|
||||
#endif
|
||||
|
||||
@@ -2396,7 +2357,7 @@ static void __init xen_post_allocator_init(void)
|
||||
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
|
||||
pv_mmu_ops.release_pte = xen_release_pte;
|
||||
pv_mmu_ops.release_pmd = xen_release_pmd;
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
pv_mmu_ops.alloc_pud = xen_alloc_pud;
|
||||
pv_mmu_ops.release_pud = xen_release_pud;
|
||||
#endif
|
||||
@@ -2460,14 +2421,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
|
||||
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
|
||||
|
||||
-#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
+#ifdef CONFIG_X86_64
|
||||
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
|
||||
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
|
||||
.set_p4d = xen_set_p4d_hyper,
|
||||
|
||||
.alloc_pud = xen_alloc_pmd_init,
|
||||
.release_pud = xen_release_pmd_init,
|
||||
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
+#endif /* CONFIG_X86_64 */
|
||||
|
||||
.activate_mm = xen_activate_mm,
|
||||
.dup_mmap = xen_dup_mmap,
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,88 @@
|
||||
From 8edefc098e2ff088d8c22d034bf8a5adf76b7295 Mon Sep 17 00:00:00 2001
|
||||
From: Dongjiu Geng <gengdongjiu@huawei.com>
|
||||
Date: Tue, 17 Oct 2017 16:02:20 +0800
|
||||
Subject: [PATCH 078/231] ACPI / APEI: remove the unused dead-code for SEA/NMI
|
||||
notification type
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
For the SEA notification, the two functions ghes_sea_add() and
|
||||
ghes_sea_remove() are only called when CONFIG_ACPI_APEI_SEA
|
||||
is defined. If not, it will return errors in the ghes_probe()
|
||||
and not continue. If the probe is failed, the ghes_sea_remove()
|
||||
also has no chance to be called. Hence, remove the unnecessary
|
||||
handling when CONFIG_ACPI_APEI_SEA is not defined.
|
||||
|
||||
For the NMI notification, it has the same issue as SEA notification,
|
||||
so also remove the unused dead-code for it.
|
||||
|
||||
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
|
||||
Tested-by: Tyler Baicar <tbaicar@codeaurora.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
(cherry picked from commit c49870e89f4d2c21c76ebe90568246bb0f3572b7)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 55f73c32ba6438e8886f348722d2b25aef129d40)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
drivers/acpi/apei/ghes.c | 33 +++++----------------------------
|
||||
1 file changed, 5 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
|
||||
index 3628078ee351..4827176f838d 100644
|
||||
--- a/drivers/acpi/apei/ghes.c
|
||||
+++ b/drivers/acpi/apei/ghes.c
|
||||
@@ -850,17 +850,8 @@ static void ghes_sea_remove(struct ghes *ghes)
|
||||
synchronize_rcu();
|
||||
}
|
||||
#else /* CONFIG_ACPI_APEI_SEA */
|
||||
-static inline void ghes_sea_add(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
|
||||
- ghes->generic->header.source_id);
|
||||
-}
|
||||
-
|
||||
-static inline void ghes_sea_remove(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
|
||||
- ghes->generic->header.source_id);
|
||||
-}
|
||||
+static inline void ghes_sea_add(struct ghes *ghes) { }
|
||||
+static inline void ghes_sea_remove(struct ghes *ghes) { }
|
||||
#endif /* CONFIG_ACPI_APEI_SEA */
|
||||
|
||||
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
|
||||
@@ -1062,23 +1053,9 @@ static void ghes_nmi_init_cxt(void)
|
||||
init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
|
||||
}
|
||||
#else /* CONFIG_HAVE_ACPI_APEI_NMI */
|
||||
-static inline void ghes_nmi_add(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
|
||||
- ghes->generic->header.source_id);
|
||||
- BUG();
|
||||
-}
|
||||
-
|
||||
-static inline void ghes_nmi_remove(struct ghes *ghes)
|
||||
-{
|
||||
- pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
|
||||
- ghes->generic->header.source_id);
|
||||
- BUG();
|
||||
-}
|
||||
-
|
||||
-static inline void ghes_nmi_init_cxt(void)
|
||||
-{
|
||||
-}
|
||||
+static inline void ghes_nmi_add(struct ghes *ghes) { }
|
||||
+static inline void ghes_nmi_remove(struct ghes *ghes) { }
|
||||
+static inline void ghes_nmi_init_cxt(void) { }
|
||||
#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
|
||||
|
||||
static int ghes_probe(struct platform_device *ghes_dev)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,78 @@
|
||||
From 05096d194a52721b3f4add5f854fc62296b82e72 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Fri, 20 Oct 2017 11:21:35 -0500
|
||||
Subject: [PATCH 079/231] x86/asm: Don't use the confusing '.ifeq' directive
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
I find the '.ifeq <expression>' directive to be confusing. Reading it
|
||||
quickly seems to suggest its opposite meaning, or that it's missing an
|
||||
argument.
|
||||
|
||||
Improve readability by replacing all of its x86 uses with
|
||||
'.if <expression> == 0'.
|
||||
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Andrei Vagin <avagin@virtuozzo.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/757da028e802c7e98d23fbab8d234b1063e161cf.1508516398.git.jpoimboe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 82c62fa0c49aa305104013cee4468772799bb391)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 981dedac1061fb47d0b04e07f6752be195d7e41a)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 2 +-
|
||||
arch/x86/kernel/head_32.S | 2 +-
|
||||
arch/x86/kernel/head_64.S | 2 +-
|
||||
3 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 2e4fc6425f47..34adfe0221d2 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -830,7 +830,7 @@ ENTRY(\sym)
|
||||
|
||||
ASM_CLAC
|
||||
|
||||
- .ifeq \has_error_code
|
||||
+ .if \has_error_code == 0
|
||||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||
.endif
|
||||
|
||||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
|
||||
index 1f85ee8f9439..337a65377baf 100644
|
||||
--- a/arch/x86/kernel/head_32.S
|
||||
+++ b/arch/x86/kernel/head_32.S
|
||||
@@ -435,7 +435,7 @@ ENTRY(early_idt_handler_array)
|
||||
# 24(%rsp) error code
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||
pushl $0 # Dummy error code, to make stack frame uniform
|
||||
.endif
|
||||
pushl $i # 20(%esp) Vector number
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 12daaa0b187f..a2d8541b1da4 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -258,7 +258,7 @@ ENDPROC(start_cpu0)
|
||||
ENTRY(early_idt_handler_array)
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||
UNWIND_HINT_IRET_REGS
|
||||
pushq $0 # Dummy error code, to make stack frame uniform
|
||||
.else
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,62 @@
|
||||
From 183c7a0eddfea6359e977cc5216972b4cc875e0d Mon Sep 17 00:00:00 2001
|
||||
From: Masahiro Yamada <yamada.masahiro@socionext.com>
|
||||
Date: Fri, 27 Oct 2017 13:11:10 +0900
|
||||
Subject: [PATCH 080/231] x86/build: Beautify build log of syscall headers
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This makes the build log look nicer.
|
||||
|
||||
Before:
|
||||
SYSTBL arch/x86/entry/syscalls/../../include/generated/asm/syscalls_32.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/asm/unistd_32_ia32.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/asm/unistd_64_x32.h
|
||||
SYSTBL arch/x86/entry/syscalls/../../include/generated/asm/syscalls_64.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_32.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_64.h
|
||||
SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_x32.h
|
||||
|
||||
After:
|
||||
SYSTBL arch/x86/include/generated/asm/syscalls_32.h
|
||||
SYSHDR arch/x86/include/generated/asm/unistd_32_ia32.h
|
||||
SYSHDR arch/x86/include/generated/asm/unistd_64_x32.h
|
||||
SYSTBL arch/x86/include/generated/asm/syscalls_64.h
|
||||
SYSHDR arch/x86/include/generated/uapi/asm/unistd_32.h
|
||||
SYSHDR arch/x86/include/generated/uapi/asm/unistd_64.h
|
||||
SYSHDR arch/x86/include/generated/uapi/asm/unistd_x32.h
|
||||
|
||||
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: "H. Peter Anvin" <hpa@zytor.com>
|
||||
Cc: linux-kbuild@vger.kernel.org
|
||||
Link: http://lkml.kernel.org/r/1509077470-2735-1-git-send-email-yamada.masahiro@socionext.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit af8e947079a7dab0480b5d6db6b093fd04b86fc9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d945957924e9b1a469516b4029fd384138c2cb69)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/syscalls/Makefile | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
|
||||
index 57aa59fd140c..e34c7a931994 100644
|
||||
--- a/arch/x86/entry/syscalls/Makefile
|
||||
+++ b/arch/x86/entry/syscalls/Makefile
|
||||
@@ -1,5 +1,5 @@
|
||||
-out := $(obj)/../../include/generated/asm
|
||||
-uapi := $(obj)/../../include/generated/uapi/asm
|
||||
+out := arch/$(SRCARCH)/include/generated/asm
|
||||
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
|
||||
|
||||
# Create output directory if not already present
|
||||
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,90 @@
|
||||
From 32cae4ea1b3927843b18c32e8e1cdfab8a0b2c19 Mon Sep 17 00:00:00 2001
|
||||
From: Baoquan He <bhe@redhat.com>
|
||||
Date: Sat, 28 Oct 2017 09:30:38 +0800
|
||||
Subject: [PATCH 081/231] x86/mm/64: Rename the register_page_bootmem_memmap()
|
||||
'size' parameter to 'nr_pages'
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
register_page_bootmem_memmap()'s 3rd 'size' parameter is named
|
||||
in a somewhat misleading fashion - rename it to 'nr_pages' which
|
||||
makes the units of it much clearer.
|
||||
|
||||
Meanwhile rename the existing local variable 'nr_pages' to
|
||||
'nr_pmd_pages', a more expressive name, to avoid conflict with
|
||||
new function parameter 'nr_pages'.
|
||||
|
||||
(Also clean up the unnecessary parentheses in which get_order() is called.)
|
||||
|
||||
Signed-off-by: Baoquan He <bhe@redhat.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: akpm@linux-foundation.org
|
||||
Link: http://lkml.kernel.org/r/1509154238-23250-1-git-send-email-bhe@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 15670bfe19905b1dcbb63137f40d718b59d84479)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d73ad1d31ef8a44c6e5977c5123cbaa6d02e2035)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
include/linux/mm.h | 2 +-
|
||||
arch/x86/mm/init_64.c | 10 +++++-----
|
||||
2 files changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
||||
index 07630442bbf2..97f6ca707010 100644
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -2475,7 +2475,7 @@ void vmemmap_populate_print_last(void);
|
||||
void vmemmap_free(unsigned long start, unsigned long end);
|
||||
#endif
|
||||
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
|
||||
- unsigned long size);
|
||||
+ unsigned long nr_pages);
|
||||
|
||||
enum mf_flags {
|
||||
MF_COUNT_INCREASED = 1 << 0,
|
||||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
|
||||
index 136422d7d539..902983c8ea8c 100644
|
||||
--- a/arch/x86/mm/init_64.c
|
||||
+++ b/arch/x86/mm/init_64.c
|
||||
@@ -1418,16 +1418,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
|
||||
void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
- struct page *start_page, unsigned long size)
|
||||
+ struct page *start_page, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long addr = (unsigned long)start_page;
|
||||
- unsigned long end = (unsigned long)(start_page + size);
|
||||
+ unsigned long end = (unsigned long)(start_page + nr_pages);
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
- unsigned int nr_pages;
|
||||
+ unsigned int nr_pmd_pages;
|
||||
struct page *page;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
@@ -1474,9 +1474,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
|
||||
- nr_pages = 1 << (get_order(PMD_SIZE));
|
||||
+ nr_pmd_pages = 1 << get_order(PMD_SIZE);
|
||||
page = pmd_page(*pmd);
|
||||
- while (nr_pages--)
|
||||
+ while (nr_pmd_pages--)
|
||||
get_page_bootmem(section_nr, page++,
|
||||
SECTION_INFO);
|
||||
}
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,87 @@
|
||||
From 59557ab0237e7474402d4240c55f119a86dadc7d Mon Sep 17 00:00:00 2001
|
||||
From: Gayatri Kammela <gayatri.kammela@intel.com>
|
||||
Date: Mon, 30 Oct 2017 18:20:29 -0700
|
||||
Subject: [PATCH 082/231] x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU
|
||||
features
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Add a few new SSE/AVX/AVX512 instruction groups/features for enumeration
|
||||
in /proc/cpuinfo: AVX512_VBMI2, GFNI, VAES, VPCLMULQDQ, AVX512_VNNI,
|
||||
AVX512_BITALG.
|
||||
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 6] AVX512_VBMI2
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 8] GFNI
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 9] VAES
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 10] VPCLMULQDQ
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 11] AVX512_VNNI
|
||||
CPUID.(EAX=7,ECX=0):ECX[bit 12] AVX512_BITALG
|
||||
|
||||
Detailed information of CPUID bits for these features can be found
|
||||
in the Intel Architecture Instruction Set Extensions and Future Features
|
||||
Programming Interface document (refer to Table 1-1. and Table 1-2.).
|
||||
A copy of this document is available at
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=197239
|
||||
|
||||
Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andi Kleen <andi.kleen@intel.com>
|
||||
Cc: Fenghua Yu <fenghua.yu@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
|
||||
Cc: Ricardo Neri <ricardo.neri@intel.com>
|
||||
Cc: Yang Zhong <yang.zhong@intel.com>
|
||||
Cc: bp@alien8.de
|
||||
Link: http://lkml.kernel.org/r/1509412829-23380-1-git-send-email-gayatri.kammela@intel.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit c128dbfa0f879f8ce7b79054037889b0b2240728)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit b29eb29c5aca4708d66fa977db40c779366636a2)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/cpufeatures.h | 6 ++++++
|
||||
arch/x86/kernel/cpu/cpuid-deps.c | 6 ++++++
|
||||
2 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
||||
index f4e145c4b06f..c465bd6613ed 100644
|
||||
--- a/arch/x86/include/asm/cpufeatures.h
|
||||
+++ b/arch/x86/include/asm/cpufeatures.h
|
||||
@@ -297,6 +297,12 @@
|
||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
|
||||
+#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
|
||||
+#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
|
||||
+#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
|
||||
+#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
|
||||
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
|
||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
index c1d49842a411..c21f22d836ad 100644
|
||||
--- a/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
|
||||
@@ -50,6 +50,12 @@ const static struct cpuid_dep cpuid_deps[] = {
|
||||
{ X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
|
||||
+ { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
|
||||
+ { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,363 @@
|
||||
From 9e6bc95ae1c4b92d9838ee8d2ee8b0e65f4e4469 Mon Sep 17 00:00:00 2001
|
||||
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Date: Fri, 27 Oct 2017 13:25:28 -0700
|
||||
Subject: [PATCH 083/231] x86/mm: Relocate page fault error codes to traps.h
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Up to this point, only fault.c used the definitions of the page fault error
|
||||
codes. Thus, it made sense to keep them within such file. Other portions of
|
||||
code might be interested in those definitions too. For instance, the User-
|
||||
Mode Instruction Prevention emulation code will use such definitions to
|
||||
emulate a page fault when it is unable to successfully copy the results
|
||||
of the emulated instructions to user space.
|
||||
|
||||
While relocating the error code enumeration, the prefix X86_ is used to
|
||||
make it consistent with the rest of the definitions in traps.h. Of course,
|
||||
code using the enumeration had to be updated as well. No functional changes
|
||||
were performed.
|
||||
|
||||
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: "Michael S. Tsirkin" <mst@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: ricardo.neri@intel.com
|
||||
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Cc: Huang Rui <ray.huang@amd.com>
|
||||
Cc: Shuah Khan <shuah@kernel.org>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
|
||||
Cc: Chris Metcalf <cmetcalf@mellanox.com>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Chen Yucong <slaoub@gmail.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
|
||||
Link: https://lkml.kernel.org/r/1509135945-13762-2-git-send-email-ricardo.neri-calderon@linux.intel.com
|
||||
|
||||
(cherry picked from commit 1067f030994c69ca1fba8c607437c8895dcf8509)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit a85a07ab9111e3c78797c20b60a664dbd5db4981)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/traps.h | 18 +++++++++
|
||||
arch/x86/mm/fault.c | 88 +++++++++++++++++---------------------------
|
||||
2 files changed, 52 insertions(+), 54 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index feb89dbe359d..8e5bf86f87e5 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -162,4 +162,22 @@ enum {
|
||||
X86_TRAP_IRET = 32, /* 32, IRET Exception */
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * Page fault error code bits:
|
||||
+ *
|
||||
+ * bit 0 == 0: no page found 1: protection fault
|
||||
+ * bit 1 == 0: read access 1: write access
|
||||
+ * bit 2 == 0: kernel-mode access 1: user-mode access
|
||||
+ * bit 3 == 1: use of reserved bit detected
|
||||
+ * bit 4 == 1: fault was an instruction fetch
|
||||
+ * bit 5 == 1: protection keys block access
|
||||
+ */
|
||||
+enum x86_pf_error_code {
|
||||
+ X86_PF_PROT = 1 << 0,
|
||||
+ X86_PF_WRITE = 1 << 1,
|
||||
+ X86_PF_USER = 1 << 2,
|
||||
+ X86_PF_RSVD = 1 << 3,
|
||||
+ X86_PF_INSTR = 1 << 4,
|
||||
+ X86_PF_PK = 1 << 5,
|
||||
+};
|
||||
#endif /* _ASM_X86_TRAPS_H */
|
||||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
|
||||
index 4ee9eb916826..d3a57e7ad311 100644
|
||||
--- a/arch/x86/mm/fault.c
|
||||
+++ b/arch/x86/mm/fault.c
|
||||
@@ -28,26 +28,6 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/exceptions.h>
|
||||
|
||||
-/*
|
||||
- * Page fault error code bits:
|
||||
- *
|
||||
- * bit 0 == 0: no page found 1: protection fault
|
||||
- * bit 1 == 0: read access 1: write access
|
||||
- * bit 2 == 0: kernel-mode access 1: user-mode access
|
||||
- * bit 3 == 1: use of reserved bit detected
|
||||
- * bit 4 == 1: fault was an instruction fetch
|
||||
- * bit 5 == 1: protection keys block access
|
||||
- */
|
||||
-enum x86_pf_error_code {
|
||||
-
|
||||
- PF_PROT = 1 << 0,
|
||||
- PF_WRITE = 1 << 1,
|
||||
- PF_USER = 1 << 2,
|
||||
- PF_RSVD = 1 << 3,
|
||||
- PF_INSTR = 1 << 4,
|
||||
- PF_PK = 1 << 5,
|
||||
-};
|
||||
-
|
||||
/*
|
||||
* Returns 0 if mmiotrace is disabled, or if the fault is not
|
||||
* handled by mmiotrace:
|
||||
@@ -149,7 +129,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
* If it was a exec (instruction fetch) fault on NX page, then
|
||||
* do not ignore the fault:
|
||||
*/
|
||||
- if (error_code & PF_INSTR)
|
||||
+ if (error_code & X86_PF_INSTR)
|
||||
return 0;
|
||||
|
||||
instr = (void *)convert_ip_to_linear(current, regs);
|
||||
@@ -179,7 +159,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
* siginfo so userspace can discover which protection key was set
|
||||
* on the PTE.
|
||||
*
|
||||
- * If we get here, we know that the hardware signaled a PF_PK
|
||||
+ * If we get here, we know that the hardware signaled a X86_PF_PK
|
||||
* fault and that there was a VMA once we got in the fault
|
||||
* handler. It does *not* guarantee that the VMA we find here
|
||||
* was the one that we faulted on.
|
||||
@@ -204,7 +184,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
|
||||
/*
|
||||
* force_sig_info_fault() is called from a number of
|
||||
* contexts, some of which have a VMA and some of which
|
||||
- * do not. The PF_PK handing happens after we have a
|
||||
+ * do not. The X86_PF_PK handing happens after we have a
|
||||
* valid VMA, so we should never reach this without a
|
||||
* valid VMA.
|
||||
*/
|
||||
@@ -693,7 +673,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||
if (!oops_may_print())
|
||||
return;
|
||||
|
||||
- if (error_code & PF_INSTR) {
|
||||
+ if (error_code & X86_PF_INSTR) {
|
||||
unsigned int level;
|
||||
pgd_t *pgd;
|
||||
pte_t *pte;
|
||||
@@ -775,7 +755,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||
*/
|
||||
if (current->thread.sig_on_uaccess_err && signal) {
|
||||
tsk->thread.trap_nr = X86_TRAP_PF;
|
||||
- tsk->thread.error_code = error_code | PF_USER;
|
||||
+ tsk->thread.error_code = error_code | X86_PF_USER;
|
||||
tsk->thread.cr2 = address;
|
||||
|
||||
/* XXX: hwpoison faults will set the wrong code. */
|
||||
@@ -894,7 +874,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
/* User mode accesses just cause a SIGSEGV */
|
||||
- if (error_code & PF_USER) {
|
||||
+ if (error_code & X86_PF_USER) {
|
||||
/*
|
||||
* It's possible to have interrupts off here:
|
||||
*/
|
||||
@@ -915,7 +895,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
* Instruction fetch faults in the vsyscall page might need
|
||||
* emulation.
|
||||
*/
|
||||
- if (unlikely((error_code & PF_INSTR) &&
|
||||
+ if (unlikely((error_code & X86_PF_INSTR) &&
|
||||
((address & ~0xfff) == VSYSCALL_ADDR))) {
|
||||
if (emulate_vsyscall(regs, address))
|
||||
return;
|
||||
@@ -928,7 +908,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
* are always protection faults.
|
||||
*/
|
||||
if (address >= TASK_SIZE_MAX)
|
||||
- error_code |= PF_PROT;
|
||||
+ error_code |= X86_PF_PROT;
|
||||
|
||||
if (likely(show_unhandled_signals))
|
||||
show_signal_msg(regs, error_code, address, tsk);
|
||||
@@ -989,11 +969,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
return false;
|
||||
- if (error_code & PF_PK)
|
||||
+ if (error_code & X86_PF_PK)
|
||||
return true;
|
||||
/* this checks permission keys on the VMA: */
|
||||
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
||||
- (error_code & PF_INSTR), foreign))
|
||||
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||
+ (error_code & X86_PF_INSTR), foreign))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@@ -1021,7 +1001,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
||||
int code = BUS_ADRERR;
|
||||
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
- if (!(error_code & PF_USER)) {
|
||||
+ if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
||||
return;
|
||||
}
|
||||
@@ -1049,14 +1029,14 @@ static noinline void
|
||||
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address, u32 *pkey, unsigned int fault)
|
||||
{
|
||||
- if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
|
||||
+ if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fault & VM_FAULT_OOM) {
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
- if (!(error_code & PF_USER)) {
|
||||
+ if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address,
|
||||
SIGSEGV, SEGV_MAPERR);
|
||||
return;
|
||||
@@ -1081,16 +1061,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||
|
||||
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
||||
{
|
||||
- if ((error_code & PF_WRITE) && !pte_write(*pte))
|
||||
+ if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
|
||||
return 0;
|
||||
|
||||
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
|
||||
+ if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
|
||||
return 0;
|
||||
/*
|
||||
* Note: We do not do lazy flushing on protection key
|
||||
- * changes, so no spurious fault will ever set PF_PK.
|
||||
+ * changes, so no spurious fault will ever set X86_PF_PK.
|
||||
*/
|
||||
- if ((error_code & PF_PK))
|
||||
+ if ((error_code & X86_PF_PK))
|
||||
return 1;
|
||||
|
||||
return 1;
|
||||
@@ -1136,8 +1116,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
|
||||
* change, so user accesses are not expected to cause spurious
|
||||
* faults.
|
||||
*/
|
||||
- if (error_code != (PF_WRITE | PF_PROT)
|
||||
- && error_code != (PF_INSTR | PF_PROT))
|
||||
+ if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
|
||||
+ error_code != (X86_PF_INSTR | X86_PF_PROT))
|
||||
return 0;
|
||||
|
||||
pgd = init_mm.pgd + pgd_index(address);
|
||||
@@ -1197,19 +1177,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||
* always an unconditional error and can never result in
|
||||
* a follow-up action to resolve the fault, like a COW.
|
||||
*/
|
||||
- if (error_code & PF_PK)
|
||||
+ if (error_code & X86_PF_PK)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Make sure to check the VMA so that we do not perform
|
||||
- * faults just to hit a PF_PK as soon as we fill in a
|
||||
+ * faults just to hit a X86_PF_PK as soon as we fill in a
|
||||
* page.
|
||||
*/
|
||||
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
||||
- (error_code & PF_INSTR), foreign))
|
||||
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||
+ (error_code & X86_PF_INSTR), foreign))
|
||||
return 1;
|
||||
|
||||
- if (error_code & PF_WRITE) {
|
||||
+ if (error_code & X86_PF_WRITE) {
|
||||
/* write, present and write, not present: */
|
||||
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
||||
return 1;
|
||||
@@ -1217,7 +1197,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
/* read, present: */
|
||||
- if (unlikely(error_code & PF_PROT))
|
||||
+ if (unlikely(error_code & X86_PF_PROT))
|
||||
return 1;
|
||||
|
||||
/* read, not present: */
|
||||
@@ -1240,7 +1220,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
|
||||
if (!static_cpu_has(X86_FEATURE_SMAP))
|
||||
return false;
|
||||
|
||||
- if (error_code & PF_USER)
|
||||
+ if (error_code & X86_PF_USER)
|
||||
return false;
|
||||
|
||||
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
|
||||
@@ -1293,7 +1273,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
* protection error (error_code & 9) == 0.
|
||||
*/
|
||||
if (unlikely(fault_in_kernel_space(address))) {
|
||||
- if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
|
||||
+ if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
|
||||
if (vmalloc_fault(address) >= 0)
|
||||
return;
|
||||
|
||||
@@ -1321,7 +1301,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
if (unlikely(kprobes_fault(regs)))
|
||||
return;
|
||||
|
||||
- if (unlikely(error_code & PF_RSVD))
|
||||
+ if (unlikely(error_code & X86_PF_RSVD))
|
||||
pgtable_bad(regs, error_code, address);
|
||||
|
||||
if (unlikely(smap_violation(error_code, regs))) {
|
||||
@@ -1347,7 +1327,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
*/
|
||||
if (user_mode(regs)) {
|
||||
local_irq_enable();
|
||||
- error_code |= PF_USER;
|
||||
+ error_code |= X86_PF_USER;
|
||||
flags |= FAULT_FLAG_USER;
|
||||
} else {
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
@@ -1356,9 +1336,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
- if (error_code & PF_WRITE)
|
||||
+ if (error_code & X86_PF_WRITE)
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
- if (error_code & PF_INSTR)
|
||||
+ if (error_code & X86_PF_INSTR)
|
||||
flags |= FAULT_FLAG_INSTRUCTION;
|
||||
|
||||
/*
|
||||
@@ -1378,7 +1358,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
* space check, thus avoiding the deadlock:
|
||||
*/
|
||||
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
||||
- if ((error_code & PF_USER) == 0 &&
|
||||
+ if (!(error_code & X86_PF_USER) &&
|
||||
!search_exception_tables(regs->ip)) {
|
||||
bad_area_nosemaphore(regs, error_code, address, NULL);
|
||||
return;
|
||||
@@ -1405,7 +1385,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
- if (error_code & PF_USER) {
|
||||
+ if (error_code & X86_PF_USER) {
|
||||
/*
|
||||
* Accessing the stack below %sp is always a bug.
|
||||
* The large cushion allows instructions like enter
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,104 @@
|
||||
From e0cef0182f7d13edb48119653a4fc225b0287b5a Mon Sep 17 00:00:00 2001
|
||||
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Date: Fri, 27 Oct 2017 13:25:29 -0700
|
||||
Subject: [PATCH 084/231] x86/boot: Relocate definition of the initial state of
|
||||
CR0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Both head_32.S and head_64.S utilize the same value to initialize the
|
||||
control register CR0. Also, other parts of the kernel might want to access
|
||||
this initial definition (e.g., emulation code for User-Mode Instruction
|
||||
Prevention uses this state to provide a sane dummy value for CR0 when
|
||||
emulating the smsw instruction). Thus, relocate this definition to a
|
||||
header file from which it can be conveniently accessed.
|
||||
|
||||
Suggested-by: Borislav Petkov <bp@alien8.de>
|
||||
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Reviewed-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: "Michael S. Tsirkin" <mst@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: ricardo.neri@intel.com
|
||||
Cc: linux-mm@kvack.org
|
||||
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Cc: Huang Rui <ray.huang@amd.com>
|
||||
Cc: Shuah Khan <shuah@kernel.org>
|
||||
Cc: linux-arch@vger.kernel.org
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: Chris Metcalf <cmetcalf@mellanox.com>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Cc: Chen Yucong <slaoub@gmail.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Link: https://lkml.kernel.org/r/1509135945-13762-3-git-send-email-ricardo.neri-calderon@linux.intel.com
|
||||
|
||||
(cherry picked from commit b0ce5b8c95c83a7b98c679b117e3d6ae6f97154b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27c31a88c22edab269abe17c0ac7db0351d26c5f)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/uapi/asm/processor-flags.h | 3 +++
|
||||
arch/x86/kernel/head_32.S | 3 ---
|
||||
arch/x86/kernel/head_64.S | 3 ---
|
||||
3 files changed, 3 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
|
||||
index 185f3d10c194..39946d0a1d41 100644
|
||||
--- a/arch/x86/include/uapi/asm/processor-flags.h
|
||||
+++ b/arch/x86/include/uapi/asm/processor-flags.h
|
||||
@@ -151,5 +151,8 @@
|
||||
#define CX86_ARR_BASE 0xc4
|
||||
#define CX86_RCR_BASE 0xdc
|
||||
|
||||
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
+ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
+ X86_CR0_PG)
|
||||
|
||||
#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
|
||||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
|
||||
index 337a65377baf..7bbcdb1ea31a 100644
|
||||
--- a/arch/x86/kernel/head_32.S
|
||||
+++ b/arch/x86/kernel/head_32.S
|
||||
@@ -213,9 +213,6 @@ ENTRY(startup_32_smp)
|
||||
#endif
|
||||
|
||||
.Ldefault_entry:
|
||||
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
- X86_CR0_PG)
|
||||
movl $(CR0_STATE & ~X86_CR0_PG),%eax
|
||||
movl %eax,%cr0
|
||||
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index a2d8541b1da4..4117c1e0b3d2 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -137,9 +137,6 @@ ENTRY(secondary_startup_64)
|
||||
1: wrmsr /* Make changes effective */
|
||||
|
||||
/* Setup cr0 */
|
||||
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
- X86_CR0_PG)
|
||||
movl $CR0_STATE, %eax
|
||||
/* Make changes effective */
|
||||
movq %rax, %cr0
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,93 @@
|
||||
From 34b5c16ae093e5663c398c87569793bfbec1c7ca Mon Sep 17 00:00:00 2001
|
||||
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Date: Fri, 27 Oct 2017 13:25:30 -0700
|
||||
Subject: [PATCH 085/231] ptrace,x86: Make user_64bit_mode() available to
|
||||
32-bit builds
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
In its current form, user_64bit_mode() can only be used when CONFIG_X86_64
|
||||
is selected. This implies that code built with CONFIG_X86_64=n cannot use
|
||||
it. If a piece of code needs to be built for both CONFIG_X86_64=y and
|
||||
CONFIG_X86_64=n and wants to use this function, it needs to wrap it in
|
||||
an #ifdef/#endif; potentially, in multiple places.
|
||||
|
||||
This can be easily avoided with a single #ifdef/#endif pair within
|
||||
user_64bit_mode() itself.
|
||||
|
||||
Suggested-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: "Michael S. Tsirkin" <mst@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: ricardo.neri@intel.com
|
||||
Cc: Adrian Hunter <adrian.hunter@intel.com>
|
||||
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Cc: Huang Rui <ray.huang@amd.com>
|
||||
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
|
||||
Cc: Shuah Khan <shuah@kernel.org>
|
||||
Cc: Kees Cook <keescook@chromium.org>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Dmitry Vyukov <dvyukov@google.com>
|
||||
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
|
||||
Cc: Chris Metcalf <cmetcalf@mellanox.com>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Colin Ian King <colin.king@canonical.com>
|
||||
Cc: Chen Yucong <slaoub@gmail.com>
|
||||
Cc: Adam Buchbinder <adam.buchbinder@gmail.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
|
||||
Cc: Masami Hiramatsu <mhiramat@kernel.org>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Thomas Garnier <thgarnie@google.com>
|
||||
Link: https://lkml.kernel.org/r/1509135945-13762-4-git-send-email-ricardo.neri-calderon@linux.intel.com
|
||||
|
||||
(cherry picked from commit e27c310af5c05cf876d9cad006928076c27f54d4)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 20ddf08f867d3d96788299cd2fb7676590d64250)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/ptrace.h | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
|
||||
index 2b5d686ea9f3..ea78a8438a8a 100644
|
||||
--- a/arch/x86/include/asm/ptrace.h
|
||||
+++ b/arch/x86/include/asm/ptrace.h
|
||||
@@ -115,9 +115,9 @@ static inline int v8086_mode(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_X86_64
|
||||
static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
{
|
||||
+#ifdef CONFIG_X86_64
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
/*
|
||||
* On non-paravirt systems, this is the only long mode CPL 3
|
||||
@@ -128,8 +128,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
/* Headers are too twisted for this to go in paravirt.h. */
|
||||
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
|
||||
#endif
|
||||
+#else /* !CONFIG_X86_64 */
|
||||
+ return false;
|
||||
+#endif
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
#define current_user_stack_pointer() current_pt_regs()->sp
|
||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||
#endif
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,75 @@
|
||||
From befef5ef70f959cd51694298c4370557e5d846cf Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:58:58 -0700
|
||||
Subject: [PATCH 086/231] x86/entry/64: Remove the restore_c_regs_and_iret
|
||||
label
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The only user was the 64-bit opportunistic SYSRET failure path, and
|
||||
that path didn't really need it. This change makes the
|
||||
opportunistic SYSRET code a bit more straightforward and gets rid of
|
||||
the label.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/be3006a7ad3326e3458cf1cc55d416252cbe1986.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 9da78ba6b47b46428cfdfc0851511ab29c869798)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 629c8b858cbe72e88e7f44a8f10e1b434ab80721)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 34adfe0221d2..fac354ddf056 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -245,7 +245,6 @@ entry_SYSCALL64_slow_path:
|
||||
call do_syscall_64 /* returns with IRQs disabled */
|
||||
|
||||
return_from_SYSCALL_64:
|
||||
- RESTORE_EXTRA_REGS
|
||||
TRACE_IRQS_IRETQ /* we're about to change IF */
|
||||
|
||||
/*
|
||||
@@ -314,6 +313,7 @@ return_from_SYSCALL_64:
|
||||
*/
|
||||
syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
+ RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
@@ -321,7 +321,7 @@ syscall_return_via_sysret:
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
SWAPGS
|
||||
- jmp restore_c_regs_and_iret
|
||||
+ jmp restore_regs_and_iret
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
@@ -638,7 +638,6 @@ retint_kernel:
|
||||
*/
|
||||
GLOBAL(restore_regs_and_iret)
|
||||
RESTORE_EXTRA_REGS
|
||||
-restore_c_regs_and_iret:
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
INTERRUPT_RETURN
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,135 @@
|
||||
From 6c390918ecf72524840bc174fb5f9d007db5a9a8 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:58:59 -0700
|
||||
Subject: [PATCH 087/231] x86/entry/64: Split the IRET-to-user and
|
||||
IRET-to-kernel paths
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
These code paths will diverge soon.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/dccf8c7b3750199b4b30383c812d4e2931811509.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 64adfba0aeb668304d171c383ac80b22158ec128)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 34 +++++++++++++++++++++++++---------
|
||||
arch/x86/entry/entry_64_compat.S | 2 +-
|
||||
arch/x86/kernel/head_64.S | 2 +-
|
||||
3 files changed, 27 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index fac354ddf056..e546441fbec3 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -321,7 +321,7 @@ syscall_return_via_sysret:
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
@@ -423,7 +423,7 @@ ENTRY(ret_from_fork)
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
|
||||
1:
|
||||
/* kernel thread */
|
||||
@@ -612,7 +612,20 @@ GLOBAL(retint_user)
|
||||
call prepare_exit_to_usermode
|
||||
TRACE_IRQS_IRETQ
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+
|
||||
+GLOBAL(restore_regs_and_return_to_usermode)
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ /* Assert that pt_regs indicates user mode. */
|
||||
+ testl $3, CS(%rsp)
|
||||
+ jnz 1f
|
||||
+ ud2
|
||||
+1:
|
||||
+#endif
|
||||
+ RESTORE_EXTRA_REGS
|
||||
+ RESTORE_C_REGS
|
||||
+ REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
+ INTERRUPT_RETURN
|
||||
+
|
||||
|
||||
/* Returning to kernel space */
|
||||
retint_kernel:
|
||||
@@ -632,11 +645,14 @@ retint_kernel:
|
||||
*/
|
||||
TRACE_IRQS_IRETQ
|
||||
|
||||
-/*
|
||||
- * At this label, code paths which return to kernel and to user,
|
||||
- * which come from interrupts/exception and from syscalls, merge.
|
||||
- */
|
||||
-GLOBAL(restore_regs_and_iret)
|
||||
+GLOBAL(restore_regs_and_return_to_kernel)
|
||||
+#ifdef CONFIG_DEBUG_ENTRY
|
||||
+ /* Assert that pt_regs indicates kernel mode. */
|
||||
+ testl $3, CS(%rsp)
|
||||
+ jz 1f
|
||||
+ ud2
|
||||
+1:
|
||||
+#endif
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
@@ -1340,7 +1356,7 @@ ENTRY(nmi)
|
||||
* work, because we don't want to enable interrupts.
|
||||
*/
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
|
||||
.Lnmi_from_kernel:
|
||||
/*
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index d8468ba24be0..2b3a88feaa2b 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -337,7 +337,7 @@ ENTRY(entry_INT80_compat)
|
||||
/* Go back to user mode. */
|
||||
TRACE_IRQS_ON
|
||||
SWAPGS
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_usermode
|
||||
END(entry_INT80_compat)
|
||||
|
||||
ALIGN
|
||||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
||||
index 4117c1e0b3d2..e785734980ad 100644
|
||||
--- a/arch/x86/kernel/head_64.S
|
||||
+++ b/arch/x86/kernel/head_64.S
|
||||
@@ -311,7 +311,7 @@ early_idt_handler_common:
|
||||
|
||||
20:
|
||||
decl early_recursion_flag(%rip)
|
||||
- jmp restore_regs_and_iret
|
||||
+ jmp restore_regs_and_return_to_kernel
|
||||
END(early_idt_handler_common)
|
||||
|
||||
__INITDATA
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,156 @@
|
||||
From 271bc7d0577bef9f344187eb45ba8682eed242f9 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:00 -0700
|
||||
Subject: [PATCH 088/231] x86/entry/64: Move SWAPGS into the common
|
||||
IRET-to-usermode path
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
All of the code paths that ended up doing IRET to usermode did
|
||||
SWAPGS immediately beforehand. Move the SWAPGS into the common
|
||||
code.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/27fd6f45b7cd640de38fb9066fd0349bcd11f8e1.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 8a055d7f411d41755ce30db5bb65b154777c4b78)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 62a85594f9be3baeb2495089f1c2980bc497d03b)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 32 ++++++++++++++------------------
|
||||
arch/x86/entry/entry_64_compat.S | 3 +--
|
||||
2 files changed, 15 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index e546441fbec3..7c8258e3ad2d 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -249,12 +249,14 @@ return_from_SYSCALL_64:
|
||||
|
||||
/*
|
||||
* Try to use SYSRET instead of IRET if we're returning to
|
||||
- * a completely clean 64-bit userspace context.
|
||||
+ * a completely clean 64-bit userspace context. If we're not,
|
||||
+ * go to the slow exit path.
|
||||
*/
|
||||
movq RCX(%rsp), %rcx
|
||||
movq RIP(%rsp), %r11
|
||||
- cmpq %rcx, %r11 /* RCX == RIP */
|
||||
- jne opportunistic_sysret_failed
|
||||
+
|
||||
+ cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||
@@ -272,14 +274,14 @@ return_from_SYSCALL_64:
|
||||
|
||||
/* If this changed %rcx, it was not canonical */
|
||||
cmpq %rcx, %r11
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
movq R11(%rsp), %r11
|
||||
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
|
||||
@@ -300,12 +302,12 @@ return_from_SYSCALL_64:
|
||||
* would never get past 'stuck_here'.
|
||||
*/
|
||||
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
|
||||
- jnz opportunistic_sysret_failed
|
||||
+ jnz swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/* nothing to check for RSP */
|
||||
|
||||
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
|
||||
- jne opportunistic_sysret_failed
|
||||
+ jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* We win! This label is here just for ease of understanding
|
||||
@@ -318,10 +320,6 @@ syscall_return_via_sysret:
|
||||
movq RSP(%rsp), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
-
|
||||
-opportunistic_sysret_failed:
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
@@ -422,8 +420,7 @@ ENTRY(ret_from_fork)
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
+ jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
1:
|
||||
/* kernel thread */
|
||||
@@ -611,9 +608,8 @@ GLOBAL(retint_user)
|
||||
mov %rsp,%rdi
|
||||
call prepare_exit_to_usermode
|
||||
TRACE_IRQS_IRETQ
|
||||
- SWAPGS
|
||||
|
||||
-GLOBAL(restore_regs_and_return_to_usermode)
|
||||
+GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
/* Assert that pt_regs indicates user mode. */
|
||||
testl $3, CS(%rsp)
|
||||
@@ -621,6 +617,7 @@ GLOBAL(restore_regs_and_return_to_usermode)
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
+ SWAPGS
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
@@ -1355,8 +1352,7 @@ ENTRY(nmi)
|
||||
* Return back to user mode. We must *not* do the normal exit
|
||||
* work, because we don't want to enable interrupts.
|
||||
*/
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
+ jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
.Lnmi_from_kernel:
|
||||
/*
|
||||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
||||
index 2b3a88feaa2b..be745b7a3e3e 100644
|
||||
--- a/arch/x86/entry/entry_64_compat.S
|
||||
+++ b/arch/x86/entry/entry_64_compat.S
|
||||
@@ -336,8 +336,7 @@ ENTRY(entry_INT80_compat)
|
||||
|
||||
/* Go back to user mode. */
|
||||
TRACE_IRQS_ON
|
||||
- SWAPGS
|
||||
- jmp restore_regs_and_return_to_usermode
|
||||
+ jmp swapgs_restore_regs_and_return_to_usermode
|
||||
END(entry_INT80_compat)
|
||||
|
||||
ALIGN
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,103 @@
|
||||
From 958fcb45b64535b87e3cfaef15a5cb41595e4187 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:01 -0700
|
||||
Subject: [PATCH 089/231] x86/entry/64: Simplify reg restore code in the
|
||||
standard IRET paths
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
The old code restored all the registers with movq instead of pop.
|
||||
|
||||
In theory, this was done because some CPUs have higher movq
|
||||
throughput, but any gain there would be tiny and is almost certainly
|
||||
outweighed by the higher text size.
|
||||
|
||||
This saves 96 bytes of text.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/ad82520a207ccd851b04ba613f4f752b33ac05f7.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e872045bfd9c465a8555bab4b8567d56a4d2d3bb)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit f926575cd370de4052e89477582b349af5664a56)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/calling.h | 21 +++++++++++++++++++++
|
||||
arch/x86/entry/entry_64.S | 12 ++++++------
|
||||
2 files changed, 27 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
||||
index 640aafebdc00..0b9dd8123701 100644
|
||||
--- a/arch/x86/entry/calling.h
|
||||
+++ b/arch/x86/entry/calling.h
|
||||
@@ -151,6 +151,27 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
UNWIND_HINT_REGS offset=\offset extra=0
|
||||
.endm
|
||||
|
||||
+ .macro POP_EXTRA_REGS
|
||||
+ popq %r15
|
||||
+ popq %r14
|
||||
+ popq %r13
|
||||
+ popq %r12
|
||||
+ popq %rbp
|
||||
+ popq %rbx
|
||||
+ .endm
|
||||
+
|
||||
+ .macro POP_C_REGS
|
||||
+ popq %r11
|
||||
+ popq %r10
|
||||
+ popq %r9
|
||||
+ popq %r8
|
||||
+ popq %rax
|
||||
+ popq %rcx
|
||||
+ popq %rdx
|
||||
+ popq %rsi
|
||||
+ popq %rdi
|
||||
+ .endm
|
||||
+
|
||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
.if \rstor_r11
|
||||
movq 6*8(%rsp), %r11
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 7c8258e3ad2d..a1a86e782a0e 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -618,9 +618,9 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||
1:
|
||||
#endif
|
||||
SWAPGS
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
+ POP_EXTRA_REGS
|
||||
+ POP_C_REGS
|
||||
+ addq $8, %rsp /* skip regs->orig_ax */
|
||||
INTERRUPT_RETURN
|
||||
|
||||
|
||||
@@ -650,9 +650,9 @@ GLOBAL(restore_regs_and_return_to_kernel)
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
+ POP_EXTRA_REGS
|
||||
+ POP_C_REGS
|
||||
+ addq $8, %rsp /* skip regs->orig_ax */
|
||||
INTERRUPT_RETURN
|
||||
|
||||
ENTRY(native_iret)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,70 @@
|
||||
From d8fdea47d7fc1177aa0843a49dc89422ac6f4fea Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:02 -0700
|
||||
Subject: [PATCH 090/231] x86/entry/64: Shrink paranoid_exit_restore and make
|
||||
labels local
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
paranoid_exit_restore was a copy of restore_regs_and_return_to_kernel.
|
||||
Merge them and make the paranoid_exit internal labels local.
|
||||
|
||||
Keeping .Lparanoid_exit makes the code a bit shorter because it
|
||||
allows a 2-byte jnz instead of a 5-byte jnz.
|
||||
|
||||
Saves 96 bytes of text.
|
||||
|
||||
( This is still a bit suboptimal in a non-CONFIG_TRACE_IRQFLAGS
|
||||
kernel, but fixing that would make the code rather messy. )
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/510d66a1895cda9473c84b1086f0bb974f22de6a.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit e53178328c9b96fbdbc719e78c93b5687ee007c3)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit fb53fe10add935c3d0eb63199e43426eaf3b4299)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 13 +++++--------
|
||||
1 file changed, 5 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index a1a86e782a0e..6995f7e08aa1 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1136,17 +1136,14 @@ ENTRY(paranoid_exit)
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF_DEBUG
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
- jnz paranoid_exit_no_swapgs
|
||||
+ jnz .Lparanoid_exit_no_swapgs
|
||||
TRACE_IRQS_IRETQ
|
||||
SWAPGS_UNSAFE_STACK
|
||||
- jmp paranoid_exit_restore
|
||||
-paranoid_exit_no_swapgs:
|
||||
+ jmp .Lparanoid_exit_restore
|
||||
+.Lparanoid_exit_no_swapgs:
|
||||
TRACE_IRQS_IRETQ_DEBUG
|
||||
-paranoid_exit_restore:
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
- INTERRUPT_RETURN
|
||||
+.Lparanoid_exit_restore:
|
||||
+ jmp restore_regs_and_return_to_kernel
|
||||
END(paranoid_exit)
|
||||
|
||||
/*
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,61 @@
|
||||
From d4b1f2361974bfffa04a528fb2ad393a55d13841 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:03 -0700
|
||||
Subject: [PATCH 091/231] x86/entry/64: Use pop instead of movq in
|
||||
syscall_return_via_sysret
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Saves 64 bytes.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/6609b7f74ab31c36604ad746e019ea8495aec76c.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 4fbb39108f972437c44e5ffa781b56635d496826)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 1e9a9d5ef9f65eeb26eb8f0974dd3e693894baf1)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 14 +++++++++++---
|
||||
1 file changed, 11 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 6995f7e08aa1..33a416c7df2d 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -315,10 +315,18 @@ return_from_SYSCALL_64:
|
||||
*/
|
||||
syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
- movq RSP(%rsp), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
+ POP_EXTRA_REGS
|
||||
+ popq %rsi /* skip r11 */
|
||||
+ popq %r10
|
||||
+ popq %r9
|
||||
+ popq %r8
|
||||
+ popq %rax
|
||||
+ popq %rsi /* skip rcx */
|
||||
+ popq %rdx
|
||||
+ popq %rsi
|
||||
+ popq %rdi
|
||||
+ movq RSP-ORIG_RAX(%rsp), %rsp
|
||||
USERGS_SYSRET64
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,60 @@
|
||||
From c48697564de6da427f1e97a38192f4d456223942 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:04 -0700
|
||||
Subject: [PATCH 092/231] x86/entry/64: Merge the fast and slow SYSRET paths
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
They did almost the same thing. Remove a bunch of pointless
|
||||
instructions (mostly hidden in macros) and reduce cognitive load by
|
||||
merging them.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/1204e20233fcab9130a1ba80b3b1879b5db3fc1f.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit a512210643da8082cb44181dba8b18e752bd68f0)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 7c4575d8bb2d01960ba9b9840fa22460e0179eca)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 33a416c7df2d..87be1cd1fa88 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -220,10 +220,9 @@ entry_SYSCALL_64_fastpath:
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQs on */
|
||||
movq RIP(%rsp), %rcx
|
||||
movq EFLAGS(%rsp), %r11
|
||||
- RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
- movq RSP(%rsp), %rsp
|
||||
+ addq $6*8, %rsp /* skip extra regs -- they were preserved */
|
||||
UNWIND_HINT_EMPTY
|
||||
- USERGS_SYSRET64
|
||||
+ jmp .Lpop_c_regs_except_rcx_r11_and_sysret
|
||||
|
||||
1:
|
||||
/*
|
||||
@@ -317,6 +316,7 @@ syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
UNWIND_HINT_EMPTY
|
||||
POP_EXTRA_REGS
|
||||
+.Lpop_c_regs_except_rcx_r11_and_sysret:
|
||||
popq %rsi /* skip r11 */
|
||||
popq %r10
|
||||
popq %r9
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,57 @@
|
||||
From c801c4e1ba695ba230e97f626abaeb0c16393b09 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:05 -0700
|
||||
Subject: [PATCH 093/231] x86/entry/64: Use POP instead of MOV to restore regs
|
||||
on NMI return
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This gets rid of the last user of the old RESTORE_..._REGS infrastructure.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/652a260f17a160789bc6a41d997f98249b73e2ab.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 471ee4832209e986029b9fabdaad57b1eecb856b)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 3c5771a43d8f00e53081871027fea891a091ff5e)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 11 +++++++----
|
||||
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 87be1cd1fa88..4eff3aca54ed 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1572,11 +1572,14 @@ end_repeat_nmi:
|
||||
nmi_swapgs:
|
||||
SWAPGS_UNSAFE_STACK
|
||||
nmi_restore:
|
||||
- RESTORE_EXTRA_REGS
|
||||
- RESTORE_C_REGS
|
||||
+ POP_EXTRA_REGS
|
||||
+ POP_C_REGS
|
||||
|
||||
- /* Point RSP at the "iret" frame. */
|
||||
- REMOVE_PT_GPREGS_FROM_STACK 6*8
|
||||
+ /*
|
||||
+ * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
|
||||
+ * at the "iret" frame.
|
||||
+ */
|
||||
+ addq $6*8, %rsp
|
||||
|
||||
/*
|
||||
* Clear "NMI executing". Set DF first so that we can easily
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,105 @@
|
||||
From 8837585aa116d1aa832e524442a1e9953d17a196 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:06 -0700
|
||||
Subject: [PATCH 094/231] x86/entry/64: Remove the RESTORE_..._REGS
|
||||
infrastructure
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
All users of RESTORE_EXTRA_REGS, RESTORE_C_REGS and such, and
|
||||
REMOVE_PT_GPREGS_FROM_STACK are gone. Delete the macros.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/c32672f6e47c561893316d48e06c7656b1039a36.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit c39858de696f0cc160a544455e8403d663d577e9)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit d248c62028c5467cd5a5ce06d344e3fb330da3ec)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/calling.h | 52 ------------------------------------------------
|
||||
1 file changed, 52 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
||||
index 0b9dd8123701..1895a685d3dd 100644
|
||||
--- a/arch/x86/entry/calling.h
|
||||
+++ b/arch/x86/entry/calling.h
|
||||
@@ -141,16 +141,6 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
UNWIND_HINT_REGS offset=\offset
|
||||
.endm
|
||||
|
||||
- .macro RESTORE_EXTRA_REGS offset=0
|
||||
- movq 0*8+\offset(%rsp), %r15
|
||||
- movq 1*8+\offset(%rsp), %r14
|
||||
- movq 2*8+\offset(%rsp), %r13
|
||||
- movq 3*8+\offset(%rsp), %r12
|
||||
- movq 4*8+\offset(%rsp), %rbp
|
||||
- movq 5*8+\offset(%rsp), %rbx
|
||||
- UNWIND_HINT_REGS offset=\offset extra=0
|
||||
- .endm
|
||||
-
|
||||
.macro POP_EXTRA_REGS
|
||||
popq %r15
|
||||
popq %r14
|
||||
@@ -172,48 +162,6 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
popq %rdi
|
||||
.endm
|
||||
|
||||
- .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
- .if \rstor_r11
|
||||
- movq 6*8(%rsp), %r11
|
||||
- .endif
|
||||
- .if \rstor_r8910
|
||||
- movq 7*8(%rsp), %r10
|
||||
- movq 8*8(%rsp), %r9
|
||||
- movq 9*8(%rsp), %r8
|
||||
- .endif
|
||||
- .if \rstor_rax
|
||||
- movq 10*8(%rsp), %rax
|
||||
- .endif
|
||||
- .if \rstor_rcx
|
||||
- movq 11*8(%rsp), %rcx
|
||||
- .endif
|
||||
- .if \rstor_rdx
|
||||
- movq 12*8(%rsp), %rdx
|
||||
- .endif
|
||||
- movq 13*8(%rsp), %rsi
|
||||
- movq 14*8(%rsp), %rdi
|
||||
- UNWIND_HINT_IRET_REGS offset=16*8
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS
|
||||
- RESTORE_C_REGS_HELPER 1,1,1,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_RAX
|
||||
- RESTORE_C_REGS_HELPER 0,1,1,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_RCX
|
||||
- RESTORE_C_REGS_HELPER 1,0,1,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_R11
|
||||
- RESTORE_C_REGS_HELPER 1,1,0,1,1
|
||||
- .endm
|
||||
- .macro RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
- RESTORE_C_REGS_HELPER 1,0,0,1,1
|
||||
- .endm
|
||||
-
|
||||
- .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
|
||||
- subq $-(15*8+\addskip), %rsp
|
||||
- .endm
|
||||
-
|
||||
.macro icebp
|
||||
.byte 0xf1
|
||||
.endm
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,105 @@
|
||||
From 47b64e9de8bba4e6ccd0976bce6cf99446daf82e Mon Sep 17 00:00:00 2001
|
||||
From: Juergen Gross <jgross@suse.com>
|
||||
Date: Thu, 2 Nov 2017 00:59:07 -0700
|
||||
Subject: [PATCH 095/231] xen, x86/entry/64: Add xen NMI trap entry
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Instead of trying to execute any NMI via the bare metal's NMI trap
|
||||
handler use a Xen specific one for PV domains, like we do for e.g.
|
||||
debug traps. As in a PV domain the NMI is handled via the normal
|
||||
kernel stack this is the correct thing to do.
|
||||
|
||||
This will enable us to get rid of the very fragile and questionable
|
||||
dependencies between the bare metal NMI handler and Xen assumptions
|
||||
believed to be broken anyway.
|
||||
|
||||
Signed-off-by: Juergen Gross <jgross@suse.com>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/5baf5c0528d58402441550c5770b98e7961e7680.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 43e4111086a70c78bedb6ad990bee97f17b27a6e)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 20c970e03b42141abf6c45938ce6d4fdc3555921)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/traps.h | 2 +-
|
||||
arch/x86/xen/enlighten_pv.c | 2 +-
|
||||
arch/x86/entry/entry_64.S | 2 +-
|
||||
arch/x86/xen/xen-asm_64.S | 2 +-
|
||||
4 files changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
||||
index 8e5bf86f87e5..b052a7621ca1 100644
|
||||
--- a/arch/x86/include/asm/traps.h
|
||||
+++ b/arch/x86/include/asm/traps.h
|
||||
@@ -55,9 +55,9 @@ asmlinkage void simd_coprocessor_error(void);
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
asmlinkage void xen_divide_error(void);
|
||||
+asmlinkage void xen_xennmi(void);
|
||||
asmlinkage void xen_xendebug(void);
|
||||
asmlinkage void xen_xenint3(void);
|
||||
-asmlinkage void xen_nmi(void);
|
||||
asmlinkage void xen_overflow(void);
|
||||
asmlinkage void xen_bounds(void);
|
||||
asmlinkage void xen_invalid_op(void);
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 69b9deff7e5c..8da4eff19c2a 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -600,7 +600,7 @@ static struct trap_array_entry trap_array[] = {
|
||||
#ifdef CONFIG_X86_MCE
|
||||
{ machine_check, xen_machine_check, true },
|
||||
#endif
|
||||
- { nmi, xen_nmi, true },
|
||||
+ { nmi, xen_xennmi, true },
|
||||
{ overflow, xen_overflow, false },
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
{ entry_INT80_compat, xen_entry_INT80_compat, false },
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 4eff3aca54ed..5a6aba7cf3bd 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1091,6 +1091,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
||||
idtentry stack_segment do_stack_segment has_error_code=1
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
+idtentry xennmi do_nmi has_error_code=0
|
||||
idtentry xendebug do_debug has_error_code=0
|
||||
idtentry xenint3 do_int3 has_error_code=0
|
||||
#endif
|
||||
@@ -1253,7 +1254,6 @@ ENTRY(error_exit)
|
||||
END(error_exit)
|
||||
|
||||
/* Runs on exception stack */
|
||||
-/* XXX: broken on Xen PV */
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
/*
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index dae2cc33afb5..286ecc198562 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -29,7 +29,7 @@ xen_pv_trap debug
|
||||
xen_pv_trap xendebug
|
||||
xen_pv_trap int3
|
||||
xen_pv_trap xenint3
|
||||
-xen_pv_trap nmi
|
||||
+xen_pv_trap xennmi
|
||||
xen_pv_trap overflow
|
||||
xen_pv_trap bounds
|
||||
xen_pv_trap invalid_op
|
||||
--
|
||||
2.14.2
|
||||
|
117
patches/kernel/0096-x86-entry-64-De-Xen-ify-our-NMI-code.patch
Normal file
117
patches/kernel/0096-x86-entry-64-De-Xen-ify-our-NMI-code.patch
Normal file
@ -0,0 +1,117 @@
|
||||
From 4a112915e611296f0d196bb6cb2baa99af0e9148 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:08 -0700
|
||||
Subject: [PATCH 096/231] x86/entry/64: De-Xen-ify our NMI code
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
Xen PV is fundamentally incompatible with our fancy NMI code: it
|
||||
doesn't use IST at all, and Xen entries clobber two stack slots
|
||||
below the hardware frame.
|
||||
|
||||
Drop Xen PV support from our NMI code entirely.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Acked-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/bfbe711b5ae03f672f8848999a8eb2711efc7f98.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 929bacec21478a72c78e4f29f98fb799bd00105a)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit ffc372909c1701c4fdd2bde7861692573ef381a7)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/entry/entry_64.S | 30 ++++++++++++++++++------------
|
||||
1 file changed, 18 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
||||
index 5a6aba7cf3bd..05501c781c20 100644
|
||||
--- a/arch/x86/entry/entry_64.S
|
||||
+++ b/arch/x86/entry/entry_64.S
|
||||
@@ -1253,9 +1253,13 @@ ENTRY(error_exit)
|
||||
jmp retint_user
|
||||
END(error_exit)
|
||||
|
||||
-/* Runs on exception stack */
|
||||
+/*
|
||||
+ * Runs on exception stack. Xen PV does not go through this path at all,
|
||||
+ * so we can use real assembly here.
|
||||
+ */
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
+
|
||||
/*
|
||||
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
||||
* the iretq it performs will take us out of NMI context.
|
||||
@@ -1313,7 +1317,7 @@ ENTRY(nmi)
|
||||
* stacks lest we corrupt the "NMI executing" variable.
|
||||
*/
|
||||
|
||||
- SWAPGS_UNSAFE_STACK
|
||||
+ swapgs
|
||||
cld
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
@@ -1478,7 +1482,7 @@ nested_nmi_out:
|
||||
popq %rdx
|
||||
|
||||
/* We are returning to kernel mode, so this cannot result in a fault. */
|
||||
- INTERRUPT_RETURN
|
||||
+ iretq
|
||||
|
||||
first_nmi:
|
||||
/* Restore rdx. */
|
||||
@@ -1509,7 +1513,7 @@ first_nmi:
|
||||
pushfq /* RFLAGS */
|
||||
pushq $__KERNEL_CS /* CS */
|
||||
pushq $1f /* RIP */
|
||||
- INTERRUPT_RETURN /* continues at repeat_nmi below */
|
||||
+ iretq /* continues at repeat_nmi below */
|
||||
UNWIND_HINT_IRET_REGS
|
||||
1:
|
||||
#endif
|
||||
@@ -1584,20 +1588,22 @@ nmi_restore:
|
||||
/*
|
||||
* Clear "NMI executing". Set DF first so that we can easily
|
||||
* distinguish the remaining code between here and IRET from
|
||||
- * the SYSCALL entry and exit paths. On a native kernel, we
|
||||
- * could just inspect RIP, but, on paravirt kernels,
|
||||
- * INTERRUPT_RETURN can translate into a jump into a
|
||||
- * hypercall page.
|
||||
+ * the SYSCALL entry and exit paths.
|
||||
+ *
|
||||
+ * We arguably should just inspect RIP instead, but I (Andy) wrote
|
||||
+ * this code when I had the misapprehension that Xen PV supported
|
||||
+ * NMIs, and Xen PV would break that approach.
|
||||
*/
|
||||
std
|
||||
movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
||||
|
||||
/*
|
||||
- * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
|
||||
- * stack in a single instruction. We are returning to kernel
|
||||
- * mode, so this cannot result in a fault.
|
||||
+ * iretq reads the "iret" frame and exits the NMI stack in a
|
||||
+ * single instruction. We are returning to kernel mode, so this
|
||||
+ * cannot result in a fault. Similarly, we don't need to worry
|
||||
+ * about espfix64 on the way back to kernel mode.
|
||||
*/
|
||||
- INTERRUPT_RETURN
|
||||
+ iretq
|
||||
END(nmi)
|
||||
|
||||
ENTRY(ignore_sysret)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,145 @@
|
||||
From f16330a748c8b8db495673108d72fcfc2873d377 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:09 -0700
|
||||
Subject: [PATCH 097/231] x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update
|
||||
code out of native_load_sp0()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This causes the MSR_IA32_SYSENTER_CS write to move out of the
|
||||
paravirt callback. This shouldn't affect Xen PV: Xen already ignores
|
||||
MSR_IA32_SYSENTER_ESP writes. In any event, Xen doesn't support
|
||||
vm86() in a useful way.
|
||||
|
||||
Note to any potential backporters: This patch won't break lguest, as
|
||||
lguest didn't have any SYSENTER support at all.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/75cf09fe03ae778532d0ca6c65aa58e66bc2f90c.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit bd7dc5a6afac719d8ce4092391eef2c7e83c2a75)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 779e32d0da9a547f3b11fbecac8287e458ba67f5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/processor.h | 7 -------
|
||||
arch/x86/include/asm/switch_to.h | 12 ++++++++++++
|
||||
arch/x86/kernel/process_32.c | 4 +++-
|
||||
arch/x86/kernel/process_64.c | 2 +-
|
||||
arch/x86/kernel/vm86_32.c | 6 +++++-
|
||||
5 files changed, 21 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index 028245e1c42b..ee37fb86900a 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -513,13 +513,6 @@ static inline void
|
||||
native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
|
||||
{
|
||||
tss->x86_tss.sp0 = thread->sp0;
|
||||
-#ifdef CONFIG_X86_32
|
||||
- /* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||
- if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
|
||||
- tss->x86_tss.ss1 = thread->sysenter_cs;
|
||||
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
- }
|
||||
-#endif
|
||||
}
|
||||
|
||||
static inline void native_swapgs(void)
|
||||
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
|
||||
index fcc5cd387fd1..7ae8caffbada 100644
|
||||
--- a/arch/x86/include/asm/switch_to.h
|
||||
+++ b/arch/x86/include/asm/switch_to.h
|
||||
@@ -72,4 +72,16 @@ do { \
|
||||
((last) = __switch_to_asm((prev), (next))); \
|
||||
} while (0)
|
||||
|
||||
+#ifdef CONFIG_X86_32
|
||||
+static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||
+{
|
||||
+ /* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||
+ if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
|
||||
+ return;
|
||||
+
|
||||
+ this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
|
||||
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
||||
index 22802162eeb9..2e42b66b8ca4 100644
|
||||
--- a/arch/x86/kernel/process_32.c
|
||||
+++ b/arch/x86/kernel/process_32.c
|
||||
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
|
||||
/*
|
||||
* Reload esp0 and cpu_current_top_of_stack. This changes
|
||||
- * current_thread_info().
|
||||
+ * current_thread_info(). Refresh the SYSENTER configuration in
|
||||
+ * case prev or next is vm86.
|
||||
*/
|
||||
load_sp0(tss, next);
|
||||
+ refresh_sysenter_cs(next);
|
||||
this_cpu_write(cpu_current_top_of_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE);
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index 1e7701c4cd80..565daaa6f18d 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
*/
|
||||
this_cpu_write(current_task, next_p);
|
||||
|
||||
- /* Reload esp0 and ss1. This changes current_thread_info(). */
|
||||
+ /* Reload sp0. */
|
||||
load_sp0(tss, next);
|
||||
|
||||
/*
|
||||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
|
||||
index 7924a5356c8a..5bc1c3ab6287 100644
|
||||
--- a/arch/x86/kernel/vm86_32.c
|
||||
+++ b/arch/x86/kernel/vm86_32.c
|
||||
@@ -54,6 +54,7 @@
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/vm86.h>
|
||||
+#include <asm/switch_to.h>
|
||||
|
||||
/*
|
||||
* Known problems:
|
||||
@@ -149,6 +150,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
+ refresh_sysenter_cs(&tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
put_cpu();
|
||||
|
||||
@@ -368,8 +370,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
/* make room for real-mode segments */
|
||||
tsk->thread.sp0 += 16;
|
||||
|
||||
- if (static_cpu_has(X86_FEATURE_SEP))
|
||||
+ if (static_cpu_has(X86_FEATURE_SEP)) {
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
+ refresh_sysenter_cs(&tsk->thread);
|
||||
+ }
|
||||
|
||||
load_sp0(tss, &tsk->thread);
|
||||
put_cpu();
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,238 @@
|
||||
From 3868ecbc68a9713951f3008ef3af3b9da7e67e60 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:10 -0700
|
||||
Subject: [PATCH 098/231] x86/entry/64: Pass SP0 directly to load_sp0()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
load_sp0() had an odd signature:
|
||||
|
||||
void load_sp0(struct tss_struct *tss, struct thread_struct *thread);
|
||||
|
||||
Simplify it to:
|
||||
|
||||
void load_sp0(unsigned long sp0);
|
||||
|
||||
Also simplify a few get_cpu()/put_cpu() sequences to
|
||||
preempt_disable()/preempt_enable().
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit da51da189a24bb9b7e2d5a123be096e51a4695a5)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 41f6a89b0be4d052a6af59df5e56102d4e4c79ef)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/paravirt.h | 5 ++---
|
||||
arch/x86/include/asm/paravirt_types.h | 2 +-
|
||||
arch/x86/include/asm/processor.h | 9 ++++-----
|
||||
arch/x86/kernel/cpu/common.c | 4 ++--
|
||||
arch/x86/kernel/process_32.c | 2 +-
|
||||
arch/x86/kernel/process_64.c | 2 +-
|
||||
arch/x86/kernel/vm86_32.c | 14 ++++++--------
|
||||
arch/x86/xen/enlighten_pv.c | 7 +++----
|
||||
8 files changed, 20 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
|
||||
index 12deec722cf0..43d4f90edebc 100644
|
||||
--- a/arch/x86/include/asm/paravirt.h
|
||||
+++ b/arch/x86/include/asm/paravirt.h
|
||||
@@ -15,10 +15,9 @@
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
-static inline void load_sp0(struct tss_struct *tss,
|
||||
- struct thread_struct *thread)
|
||||
+static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
- PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
|
||||
+ PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
|
||||
}
|
||||
|
||||
/* The paravirtualized CPUID instruction. */
|
||||
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
|
||||
index 42873edd9f9d..e3953a1e2b57 100644
|
||||
--- a/arch/x86/include/asm/paravirt_types.h
|
||||
+++ b/arch/x86/include/asm/paravirt_types.h
|
||||
@@ -133,7 +133,7 @@ struct pv_cpu_ops {
|
||||
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||
|
||||
- void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
|
||||
+ void (*load_sp0)(unsigned long sp0);
|
||||
|
||||
void (*set_iopl_mask)(unsigned mask);
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index ee37fb86900a..85ddfc1a9bb5 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -510,9 +510,9 @@ static inline void native_set_iopl_mask(unsigned mask)
|
||||
}
|
||||
|
||||
static inline void
|
||||
-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
|
||||
+native_load_sp0(unsigned long sp0)
|
||||
{
|
||||
- tss->x86_tss.sp0 = thread->sp0;
|
||||
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||
}
|
||||
|
||||
static inline void native_swapgs(void)
|
||||
@@ -537,10 +537,9 @@ static inline unsigned long current_top_of_stack(void)
|
||||
#else
|
||||
#define __cpuid native_cpuid
|
||||
|
||||
-static inline void load_sp0(struct tss_struct *tss,
|
||||
- struct thread_struct *thread)
|
||||
+static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
- native_load_sp0(tss, thread);
|
||||
+ native_load_sp0(sp0);
|
||||
}
|
||||
|
||||
#define set_iopl_mask native_set_iopl_mask
|
||||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
||||
index ef7b1ba56363..6562acbfc4e0 100644
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -1570,7 +1570,7 @@ void cpu_init(void)
|
||||
BUG_ON(me->mm);
|
||||
enter_lazy_tlb(&init_mm, me);
|
||||
|
||||
- load_sp0(t, ¤t->thread);
|
||||
+ load_sp0(current->thread.sp0);
|
||||
set_tss_desc(cpu, t);
|
||||
load_TR_desc();
|
||||
load_mm_ldt(&init_mm);
|
||||
@@ -1624,7 +1624,7 @@ void cpu_init(void)
|
||||
BUG_ON(curr->mm);
|
||||
enter_lazy_tlb(&init_mm, curr);
|
||||
|
||||
- load_sp0(t, thread);
|
||||
+ load_sp0(thread->sp0);
|
||||
set_tss_desc(cpu, t);
|
||||
load_TR_desc();
|
||||
load_mm_ldt(&init_mm);
|
||||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
||||
index 2e42b66b8ca4..48a3f240f565 100644
|
||||
--- a/arch/x86/kernel/process_32.c
|
||||
+++ b/arch/x86/kernel/process_32.c
|
||||
@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
* current_thread_info(). Refresh the SYSENTER configuration in
|
||||
* case prev or next is vm86.
|
||||
*/
|
||||
- load_sp0(tss, next);
|
||||
+ load_sp0(next->sp0);
|
||||
refresh_sysenter_cs(next);
|
||||
this_cpu_write(cpu_current_top_of_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
||||
index 565daaa6f18d..37b933628a8b 100644
|
||||
--- a/arch/x86/kernel/process_64.c
|
||||
+++ b/arch/x86/kernel/process_64.c
|
||||
@@ -466,7 +466,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
this_cpu_write(current_task, next_p);
|
||||
|
||||
/* Reload sp0. */
|
||||
- load_sp0(tss, next);
|
||||
+ load_sp0(next->sp0);
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
|
||||
index 5bc1c3ab6287..0f1d92cd20ad 100644
|
||||
--- a/arch/x86/kernel/vm86_32.c
|
||||
+++ b/arch/x86/kernel/vm86_32.c
|
||||
@@ -94,7 +94,6 @@
|
||||
|
||||
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
{
|
||||
- struct tss_struct *tss;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86plus_struct __user *user;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
@@ -146,13 +145,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
- tss = &per_cpu(cpu_tss, get_cpu());
|
||||
+ preempt_disable();
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
- load_sp0(tss, &tsk->thread);
|
||||
+ load_sp0(tsk->thread.sp0);
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
- put_cpu();
|
||||
+ preempt_enable();
|
||||
|
||||
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
||||
|
||||
@@ -238,7 +237,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
{
|
||||
- struct tss_struct *tss;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86 *vm86 = tsk->thread.vm86;
|
||||
struct kernel_vm86_regs vm86regs;
|
||||
@@ -366,8 +364,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
vm86->saved_sp0 = tsk->thread.sp0;
|
||||
lazy_save_gs(vm86->regs32.gs);
|
||||
|
||||
- tss = &per_cpu(cpu_tss, get_cpu());
|
||||
/* make room for real-mode segments */
|
||||
+ preempt_disable();
|
||||
tsk->thread.sp0 += 16;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_SEP)) {
|
||||
@@ -375,8 +373,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
}
|
||||
|
||||
- load_sp0(tss, &tsk->thread);
|
||||
- put_cpu();
|
||||
+ load_sp0(tsk->thread.sp0);
|
||||
+ preempt_enable();
|
||||
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
||||
index 8da4eff19c2a..e7b213047724 100644
|
||||
--- a/arch/x86/xen/enlighten_pv.c
|
||||
+++ b/arch/x86/xen/enlighten_pv.c
|
||||
@@ -810,15 +810,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
|
||||
}
|
||||
}
|
||||
|
||||
-static void xen_load_sp0(struct tss_struct *tss,
|
||||
- struct thread_struct *thread)
|
||||
+static void xen_load_sp0(unsigned long sp0)
|
||||
{
|
||||
struct multicall_space mcs;
|
||||
|
||||
mcs = xen_mc_entry(0);
|
||||
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
|
||||
+ MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
- tss->x86_tss.sp0 = thread->sp0;
|
||||
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||
}
|
||||
|
||||
void xen_set_iopl_mask(unsigned mask)
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,48 @@
|
||||
From 181d224dabca9a9061a6955cf3d49a4eba7294bf Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:11 -0700
|
||||
Subject: [PATCH 099/231] x86/entry: Add task_top_of_stack() to find the top of
|
||||
a task's stack
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
This will let us get rid of a few places that hardcode accesses to
|
||||
thread.sp0.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/b49b3f95a8ff858c40c9b0f5b32be0355324327d.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit 3500130b84a3cdc5b6796eba1daf178944935efe)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit f1078e10e361afaeb22ee72c54d5ad397e19728d)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/include/asm/processor.h | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
||||
index 85ddfc1a9bb5..f83fbf1b6dd9 100644
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -788,6 +788,8 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
|
||||
TOP_OF_KERNEL_STACK_PADDING)
|
||||
|
||||
+#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
|
||||
+
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* User space process size: 3GB (default).
|
||||
--
|
||||
2.14.2
|
||||
|
@ -0,0 +1,99 @@
|
||||
From 232ab20b1af958a04a82fb7290a1e54c3632f771 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 2 Nov 2017 00:59:12 -0700
|
||||
Subject: [PATCH 100/231] x86/xen/64, x86/entry/64: Clean up SP code in
|
||||
cpu_initialize_context()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CVE-2017-5754
|
||||
|
||||
I'm removing thread_struct::sp0, and Xen's usage of it is slightly
|
||||
dubious and unnecessary. Use appropriate helpers instead.
|
||||
|
||||
While we're at at, reorder the code slightly to make it more obvious
|
||||
what's going on.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Juergen Gross <jgross@suse.com>
|
||||
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||
Cc: Borislav Petkov <bpetkov@suse.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Dave Hansen <dave.hansen@intel.com>
|
||||
Cc: Juergen Gross <jgross@suse.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/d5b9a3da2b47c68325bd2bbe8f82d9554dee0d0f.1509609304.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
(cherry picked from commit f16b3da1dc936c0f8121741d0a1731bf242f2f56)
|
||||
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
||||
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
||||
(cherry picked from commit 27c60a1f6c49062151f67042458a523386cc3dc5)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
arch/x86/xen/smp_pv.c | 17 ++++++++++++++---
|
||||
1 file changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
|
||||
index 51471408fdd1..8c0e047d0b80 100644
|
||||
--- a/arch/x86/xen/smp_pv.c
|
||||
+++ b/arch/x86/xen/smp_pv.c
|
||||
@@ -13,6 +13,7 @@
|
||||
* single-threaded.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
+#include <linux/sched/task_stack.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/smp.h>
|
||||
@@ -293,12 +294,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
#endif
|
||||
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
|
||||
|
||||
+ /*
|
||||
+ * Bring up the CPU in cpu_bringup_and_idle() with the stack
|
||||
+ * pointing just below where pt_regs would be if it were a normal
|
||||
+ * kernel entry.
|
||||
+ */
|
||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||
ctxt->flags = VGCF_IN_KERNEL;
|
||||
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
||||
ctxt->user_regs.ds = __USER_DS;
|
||||
ctxt->user_regs.es = __USER_DS;
|
||||
ctxt->user_regs.ss = __KERNEL_DS;
|
||||
+ ctxt->user_regs.cs = __KERNEL_CS;
|
||||
+ ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
|
||||
|
||||
xen_copy_trap_info(ctxt->trap_ctxt);
|
||||
|
||||
@@ -313,8 +321,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
ctxt->gdt_frames[0] = gdt_mfn;
|
||||
ctxt->gdt_ents = GDT_ENTRIES;
|
||||
|
||||
+ /*
|
||||
+ * Set SS:SP that Xen will use when entering guest kernel mode
|
||||
+ * from guest user mode. Subsequent calls to load_sp0() can
|
||||
+ * change this value.
|
||||
+ */
|
||||
ctxt->kernel_ss = __KERNEL_DS;
|
||||
- ctxt->kernel_sp = idle->thread.sp0;
|
||||
+ ctxt->kernel_sp = task_top_of_stack(idle);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->event_callback_cs = __KERNEL_CS;
|
||||
@@ -326,10 +339,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
(unsigned long)xen_hypervisor_callback;
|
||||
ctxt->failsafe_callback_eip =
|
||||
(unsigned long)xen_failsafe_callback;
|
||||
- ctxt->user_regs.cs = __KERNEL_CS;
|
||||
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
|
||||
|
||||
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
|
||||
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
|
||||
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
|
||||
BUG();
|
||||
--
|
||||
2.14.2
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user