2018-01-06 17:13:39 +03:00
|
|
|
From ddb5e7b381d37d0f8bca61f0b761ae5c3a2f5ee0 Mon Sep 17 00:00:00 2001
|
|
|
|
From: Andy Lutomirski <luto@kernel.org>
|
|
|
|
Date: Sun, 17 Sep 2017 09:03:48 -0700
|
2018-01-07 15:17:59 +03:00
|
|
|
Subject: [PATCH 043/233] x86/mm: Factor out CR3-building code
|
2018-01-06 17:13:39 +03:00
|
|
|
MIME-Version: 1.0
|
|
|
|
Content-Type: text/plain; charset=UTF-8
|
|
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
|
|
|
|
CVE-2017-5754
|
|
|
|
|
|
|
|
Current, the code that assembles a value to load into CR3 is
|
|
|
|
open-coded everywhere. Factor it out into helpers build_cr3() and
|
|
|
|
build_cr3_noflush().
|
|
|
|
|
|
|
|
This makes one semantic change: __get_current_cr3_fast() was wrong
|
|
|
|
on SME systems. No one noticed because the only caller is in the
|
|
|
|
VMX code, and there are no CPUs with both SME and VMX.
|
|
|
|
|
|
|
|
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
|
|
|
Cc: Borislav Petkov <bpetkov@suse.de>
|
|
|
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
|
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
|
|
|
|
Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
|
|
|
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
|
|
(backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
|
|
|
|
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
|
|
|
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
|
|
|
|
(cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
|
|
|
|
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
|
|
|
---
|
|
|
|
arch/x86/include/asm/mmu_context.h | 15 ++++++---
|
|
|
|
arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++---
|
|
|
|
2 files changed, 75 insertions(+), 8 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
|
|
|
index 7ae318c340d9..a999ba6b721f 100644
|
|
|
|
--- a/arch/x86/include/asm/mmu_context.h
|
|
|
|
+++ b/arch/x86/include/asm/mmu_context.h
|
|
|
|
@@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|
|
|
return __pkru_allows_pkey(vma_pkey(vma), write);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
|
|
|
|
+{
|
|
|
|
+ return __sme_pa(mm->pgd) | asid;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
|
|
|
+{
|
|
|
|
+ return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
|
|
|
|
+}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This can be used from process context to figure out what the value of
|
|
|
|
@@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|
|
|
*/
|
|
|
|
static inline unsigned long __get_current_cr3_fast(void)
|
|
|
|
{
|
|
|
|
- unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
|
|
|
|
-
|
|
|
|
- if (static_cpu_has(X86_FEATURE_PCID))
|
|
|
|
- cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
|
|
|
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
|
|
|
|
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid));
|
|
|
|
|
|
|
|
/* For now, be very restrictive about when this can be called. */
|
|
|
|
VM_WARN_ON(in_nmi() || preemptible());
|
|
|
|
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
|
|
|
index 57943b4d8f2e..440400316c8a 100644
|
|
|
|
--- a/arch/x86/mm/tlb.c
|
|
|
|
+++ b/arch/x86/mm/tlb.c
|
|
|
|
@@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
|
|
* without going through leave_mm() / switch_mm_irqs_off() or that
|
|
|
|
* does something like write_cr3(read_cr3_pa()).
|
|
|
|
*/
|
|
|
|
- VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
|
|
|
|
+#ifdef CONFIG_DEBUG_VM
|
|
|
|
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
|
|
|
|
+ /*
|
|
|
|
+ * If we were to BUG here, we'd be very likely to kill
|
|
|
|
+ * the system so hard that we don't see the call trace.
|
|
|
|
+ * Try to recover instead by ignoring the error and doing
|
|
|
|
+ * a global flush to minimize the chance of corruption.
|
|
|
|
+ *
|
|
|
|
+ * (This is far from being a fully correct recovery.
|
|
|
|
+ * Architecturally, the CPU could prefetch something
|
|
|
|
+ * back into an incorrect ASID slot and leave it there
|
|
|
|
+ * to cause trouble down the road. It's better than
|
|
|
|
+ * nothing, though.)
|
|
|
|
+ */
|
|
|
|
+ __flush_tlb_all();
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
|
|
|
|
if (real_prev == next) {
|
|
|
|
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
|
|
|
@@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
|
|
*/
|
|
|
|
this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
|
|
|
|
next_tlb_gen);
|
|
|
|
- write_cr3(__pa(next->pgd) | prev_asid);
|
|
|
|
+ write_cr3(build_cr3(next, prev_asid));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This gets called via leave_mm() in the idle path
|
|
|
|
@@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
|
|
if (need_flush) {
|
|
|
|
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
|
|
|
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
|
|
|
- write_cr3(__pa(next->pgd) | new_asid);
|
|
|
|
+ write_cr3(build_cr3(next, new_asid));
|
|
|
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
|
|
|
|
TLB_FLUSH_ALL);
|
|
|
|
} else {
|
|
|
|
/* The new ASID is already up to date. */
|
|
|
|
- write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
|
|
|
|
+ write_cr3(build_cr3_noflush(next, new_asid));
|
|
|
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
@@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
|
|
switch_ldt(real_prev, next);
|
|
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Call this when reinitializing a CPU. It fixes the following potential
|
|
|
|
+ * problems:
|
|
|
|
+ *
|
|
|
|
+ * - The ASID changed from what cpu_tlbstate thinks it is (most likely
|
|
|
|
+ * because the CPU was taken down and came back up with CR3's PCID
|
|
|
|
+ * bits clear. CPU hotplug can do this.
|
|
|
|
+ *
|
|
|
|
+ * - The TLB contains junk in slots corresponding to inactive ASIDs.
|
|
|
|
+ *
|
|
|
|
+ * - The CPU went so far out to lunch that it may have missed a TLB
|
|
|
|
+ * flush.
|
|
|
|
+ */
|
|
|
|
+void initialize_tlbstate_and_flush(void)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
|
|
|
+ u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
|
|
|
|
+ unsigned long cr3 = __read_cr3();
|
|
|
|
+
|
|
|
|
+ /* Assert that CR3 already references the right mm. */
|
|
|
|
+ WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
|
|
|
|
+ * doesn't work like other CR4 bits because it can only be set from
|
|
|
|
+ * long mode.)
|
|
|
|
+ */
|
|
|
|
+ WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
|
|
|
|
+ !(cr4_read_shadow() & X86_CR4_PCIDE));
|
|
|
|
+
|
|
|
|
+ /* Force ASID 0 and force a TLB flush. */
|
|
|
|
+ write_cr3(build_cr3(mm, 0));
|
|
|
|
+
|
|
|
|
+ /* Reinitialize tlbstate. */
|
|
|
|
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
|
|
|
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
|
|
|
|
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|
|
|
|
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
|
|
|
|
+
|
|
|
|
+ for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
|
|
|
|
+ this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* flush_tlb_func_common()'s memory ordering requirement is that any
|
|
|
|
* TLB fills that happen after we flush the TLB are ordered after we
|
|
|
|
--
|
|
|
|
2.14.2
|
|
|
|
|