From ddb5e7b381d37d0f8bca61f0b761ae5c3a2f5ee0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 17 Sep 2017 09:03:48 -0700 Subject: [PATCH 043/233] x86/mm: Factor out CR3-building code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CVE-2017-5754 Current, the code that assembles a value to load into CR3 is open-coded everywhere. Factor it out into helpers build_cr3() and build_cr3_noflush(). This makes one semantic change: __get_current_cr3_fast() was wrong on SME systems. No one noticed because the only caller is in the VMX code, and there are no CPUs with both SME and VMX. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org Signed-off-by: Ingo Molnar (backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f) Signed-off-by: Andy Whitcroft Signed-off-by: Kleber Sacilotto de Souza (cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8) Signed-off-by: Fabian Grünbichler --- arch/x86/include/asm/mmu_context.h | 15 ++++++--- arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 7ae318c340d9..a999ba6b721f 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } +static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) +{ + return __sme_pa(mm->pgd) | asid; +} + +static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) +{ + return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH; +} /* * This can be used from process context to figure out what the value of @@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, */ static inline unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); - - if (static_cpu_has(X86_FEATURE_PCID)) - cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid); + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), + this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ VM_WARN_ON(in_nmi() || preemptible()); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 57943b4d8f2e..440400316c8a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * without going through leave_mm() / switch_mm_irqs_off() or that * does something like write_cr3(read_cr3_pa()). */ - VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid)); +#ifdef CONFIG_DEBUG_VM + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { + /* + * If we were to BUG here, we'd be very likely to kill + * the system so hard that we don't see the call trace. + * Try to recover instead by ignoring the error and doing + * a global flush to minimize the chance of corruption. + * + * (This is far from being a fully correct recovery. + * Architecturally, the CPU could prefetch something + * back into an incorrect ASID slot and leave it there + * to cause trouble down the road. It's better than + * nothing, though.) + */ + __flush_tlb_all(); + } +#endif if (real_prev == next) { VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != @@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, */ this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen, next_tlb_gen); - write_cr3(__pa(next->pgd) | prev_asid); + write_cr3(build_cr3(next, prev_asid)); /* * This gets called via leave_mm() in the idle path @@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(__pa(next->pgd) | new_asid); + write_cr3(build_cr3(next, new_asid)); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH); + write_cr3(build_cr3_noflush(next, new_asid)); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); } @@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, switch_ldt(real_prev, next); } +/* + * Call this when reinitializing a CPU. It fixes the following potential + * problems: + * + * - The ASID changed from what cpu_tlbstate thinks it is (most likely + * because the CPU was taken down and came back up with CR3's PCID + * bits clear. CPU hotplug can do this. + * + * - The TLB contains junk in slots corresponding to inactive ASIDs. + * + * - The CPU went so far out to lunch that it may have missed a TLB + * flush. + */ +void initialize_tlbstate_and_flush(void) +{ + int i; + struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm); + u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen); + unsigned long cr3 = __read_cr3(); + + /* Assert that CR3 already references the right mm. */ + WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); + + /* + * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization + * doesn't work like other CR4 bits because it can only be set from + * long mode.) + */ + WARN_ON(boot_cpu_has(X86_FEATURE_PCID) && + !(cr4_read_shadow() & X86_CR4_PCIDE)); + + /* Force ASID 0 and force a TLB flush. */ + write_cr3(build_cr3(mm, 0)); + + /* Reinitialize tlbstate. */ + this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); + this_cpu_write(cpu_tlbstate.next_asid, 1); + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); + this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen); + + for (i = 1; i < TLB_NR_DYN_ASIDS; i++) + this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0); +} + /* * flush_tlb_func_common()'s memory ordering requirement is that any * TLB fills that happen after we flush the TLB are ordered after we -- 2.14.2