195 lines
6.6 KiB
Diff
195 lines
6.6 KiB
Diff
From 534c2338c3f72069910c06eba7693a4a1d15faf8 Mon Sep 17 00:00:00 2001
|
|
From: Dave Hansen <dave.hansen@linux.intel.com>
|
|
Date: Mon, 4 Dec 2017 15:08:01 +0100
|
|
Subject: [PATCH 211/233] x86/mm: Use INVPCID for __native_flush_tlb_single()
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
CVE-2017-5754
|
|
|
|
This uses INVPCID to shoot down individual lines of the user mapping
|
|
instead of marking the entire user map as invalid. This
|
|
could/might/possibly be faster.
|
|
|
|
This for sure needs tlb_single_page_flush_ceiling to be redetermined;
|
|
esp. since INVPCID is _slow_.
|
|
|
|
A detailed performance analysis is available here:
|
|
|
|
https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com
|
|
|
|
[ Peterz: Split out from big combo patch ]
|
|
|
|
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
|
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
Cc: Andy Lutomirski <luto@kernel.org>
|
|
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
|
Cc: Borislav Petkov <bp@alien8.de>
|
|
Cc: Brian Gerst <brgerst@gmail.com>
|
|
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
|
Cc: Eduardo Valentin <eduval@amazon.com>
|
|
Cc: Greg KH <gregkh@linuxfoundation.org>
|
|
Cc: H. Peter Anvin <hpa@zytor.com>
|
|
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
|
|
Cc: Juergen Gross <jgross@suse.com>
|
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
Cc: Will Deacon <will.deacon@arm.com>
|
|
Cc: aliguori@amazon.com
|
|
Cc: daniel.gruss@iaik.tugraz.at
|
|
Cc: hughd@google.com
|
|
Cc: keescook@google.com
|
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
(cherry picked from commit 6cff64b86aaaa07f89f50498055a20e45754b0c1)
|
|
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
|
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
|
(cherry picked from commit e4986a4e89c0eb40f824a8505feefff3328ad4b2)
|
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
---
|
|
arch/x86/include/asm/cpufeatures.h | 1 +
|
|
arch/x86/include/asm/tlbflush.h | 23 +++++++++++++-
|
|
arch/x86/mm/init.c | 64 ++++++++++++++++++++++----------------
|
|
3 files changed, 60 insertions(+), 28 deletions(-)
|
|
|
|
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
|
index de4e91452de4..9b0c283afcf0 100644
|
|
--- a/arch/x86/include/asm/cpufeatures.h
|
|
+++ b/arch/x86/include/asm/cpufeatures.h
|
|
@@ -196,6 +196,7 @@
|
|
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
|
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
|
|
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
|
|
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
|
|
|
|
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
|
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
|
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
|
|
index 2b7b32c243f1..979e590648a5 100644
|
|
--- a/arch/x86/include/asm/tlbflush.h
|
|
+++ b/arch/x86/include/asm/tlbflush.h
|
|
@@ -84,6 +84,18 @@ static inline u16 kern_pcid(u16 asid)
|
|
return asid + 1;
|
|
}
|
|
|
|
+/*
|
|
+ * The user PCID is just the kernel one, plus the "switch bit".
|
|
+ */
|
|
+static inline u16 user_pcid(u16 asid)
|
|
+{
|
|
+ u16 ret = kern_pcid(asid);
|
|
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
|
+ ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
|
|
+#endif
|
|
+ return ret;
|
|
+}
|
|
+
|
|
struct pgd_t;
|
|
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
|
|
{
|
|
@@ -324,6 +336,8 @@ static inline void __native_flush_tlb_global(void)
|
|
/*
|
|
* Using INVPCID is considerably faster than a pair of writes
|
|
* to CR4 sandwiched inside an IRQ flag save/restore.
|
|
+ *
|
|
+ * Note, this works with CR4.PCIDE=0 or 1.
|
|
*/
|
|
invpcid_flush_all();
|
|
return;
|
|
@@ -357,7 +371,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
|
|
if (!static_cpu_has(X86_FEATURE_PTI))
|
|
return;
|
|
|
|
- invalidate_user_asid(loaded_mm_asid);
|
|
+ /*
|
|
+ * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
|
|
+ * Just use invalidate_user_asid() in case we are called early.
|
|
+ */
|
|
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
|
|
+ invalidate_user_asid(loaded_mm_asid);
|
|
+ else
|
|
+ invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
|
|
}
|
|
|
|
/*
|
|
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
|
index caeb8a7bf0a4..80259ad8c386 100644
|
|
--- a/arch/x86/mm/init.c
|
|
+++ b/arch/x86/mm/init.c
|
|
@@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)
|
|
|
|
static void setup_pcid(void)
|
|
{
|
|
-#ifdef CONFIG_X86_64
|
|
- if (boot_cpu_has(X86_FEATURE_PCID)) {
|
|
- if (boot_cpu_has(X86_FEATURE_PGE)) {
|
|
- /*
|
|
- * This can't be cr4_set_bits_and_update_boot() --
|
|
- * the trampoline code can't handle CR4.PCIDE and
|
|
- * it wouldn't do any good anyway. Despite the name,
|
|
- * cr4_set_bits_and_update_boot() doesn't actually
|
|
- * cause the bits in question to remain set all the
|
|
- * way through the secondary boot asm.
|
|
- *
|
|
- * Instead, we brute-force it and set CR4.PCIDE
|
|
- * manually in start_secondary().
|
|
- */
|
|
- cr4_set_bits(X86_CR4_PCIDE);
|
|
- } else {
|
|
- /*
|
|
- * flush_tlb_all(), as currently implemented, won't
|
|
- * work if PCID is on but PGE is not. Since that
|
|
- * combination doesn't exist on real hardware, there's
|
|
- * no reason to try to fully support it, but it's
|
|
- * polite to avoid corrupting data if we're on
|
|
- * an improperly configured VM.
|
|
- */
|
|
- setup_clear_cpu_cap(X86_FEATURE_PCID);
|
|
- }
|
|
+ if (!IS_ENABLED(CONFIG_X86_64))
|
|
+ return;
|
|
+
|
|
+ if (!boot_cpu_has(X86_FEATURE_PCID))
|
|
+ return;
|
|
+
|
|
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
|
|
+ /*
|
|
+ * This can't be cr4_set_bits_and_update_boot() -- the
|
|
+ * trampoline code can't handle CR4.PCIDE and it wouldn't
|
|
+ * do any good anyway. Despite the name,
|
|
+ * cr4_set_bits_and_update_boot() doesn't actually cause
|
|
+ * the bits in question to remain set all the way through
|
|
+ * the secondary boot asm.
|
|
+ *
|
|
+ * Instead, we brute-force it and set CR4.PCIDE manually in
|
|
+ * start_secondary().
|
|
+ */
|
|
+ cr4_set_bits(X86_CR4_PCIDE);
|
|
+
|
|
+ /*
|
|
+ * INVPCID's single-context modes (2/3) only work if we set
|
|
+ * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
|
|
+ * on systems that have X86_CR4_PCIDE clear, or that have
|
|
+ * no INVPCID support at all.
|
|
+ */
|
|
+ if (boot_cpu_has(X86_FEATURE_INVPCID))
|
|
+ setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
|
|
+ } else {
|
|
+ /*
|
|
+ * flush_tlb_all(), as currently implemented, won't work if
|
|
+ * PCID is on but PGE is not. Since that combination
|
|
+ * doesn't exist on real hardware, there's no reason to try
|
|
+ * to fully support it, but it's polite to avoid corrupting
|
|
+ * data if we're on an improperly configured VM.
|
|
+ */
|
|
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
|
|
}
|
|
-#endif
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
--
|
|
2.14.2
|
|
|