diff --git a/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch b/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch index 6d9941e..b98c824 100644 --- a/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch +++ b/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch @@ -1,7 +1,7 @@ -From 3e7aff6b22287eb04b8b5c4966b886b0ac1e8b06 Mon Sep 17 00:00:00 2001 +From 9d8a1db451ee4bcf6a6b7cb99facc922e67ef968 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 May 2015 19:29:22 +0100 -Subject: [PATCH 1/6] Make mkcompile_h accept an alternate timestamp string +Subject: [PATCH 1/7] Make mkcompile_h accept an alternate timestamp string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -42,5 +42,5 @@ index fd8fdb91581d..1e35ac9fc810 100755 if test -z "$KBUILD_BUILD_USER"; then LINUX_COMPILE_BY=$(whoami | sed 's/\\/\\\\/') -- -2.11.0 +2.14.2 diff --git a/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch b/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch index a1b62bf..92f2703 100644 --- a/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch +++ b/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch @@ -1,7 +1,7 @@ -From 400ed0a6a0992f685b547e12f0fd5d35176e1275 Mon Sep 17 00:00:00 2001 +From a940827b3da8eeb851fc6a6e620f1867a55900d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= Date: Thu, 14 Sep 2017 11:02:18 +0200 -Subject: [PATCH 2/6] bridge: keep MAC of first assigned port +Subject: [PATCH 2/7] bridge: keep MAC of first assigned port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -34,5 +34,5 @@ index 89110319ef0f..5e73fff65f47 100644 if (ether_addr_equal(br->bridge_id.addr, addr)) -- -2.11.0 +2.14.2 diff --git a/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch b/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch index 7084ff3..1eed7de 100644 --- a/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch +++ b/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch @@ -1,7 +1,7 @@ -From e39d70502f7a981f821243160501303b7ee26d52 Mon Sep 17 00:00:00 2001 +From 218bc45e33a23e8d9460389d9894a3acaa5e3bff Mon Sep 17 00:00:00 2001 From: Mark Weiman Date: Sat, 29 Jul 2017 09:15:32 -0400 -Subject: [PATCH 3/6] pci: Enable overrides for missing ACS capabilities +Subject: [PATCH 3/7] pci: Enable overrides for missing ACS capabilities (4.12+) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 @@ -55,7 +55,7 @@ Signed-off-by: Fabian Grünbichler 2 files changed, 111 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index f8ce2089146c..d4ace16bd59a 100644 +index 7441c67d1d8e..73fd6abac39b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2918,6 +2918,15 @@ @@ -75,7 +75,7 @@ index f8ce2089146c..d4ace16bd59a 100644 Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index b7832fd4dbf4..f128ca4aea2b 100644 +index 02b009426670..c29d89ffc9b2 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3687,6 +3687,107 @@ static int __init pci_apply_final_quirks(void) @@ -186,7 +186,7 @@ index b7832fd4dbf4..f128ca4aea2b 100644 /* * Following are device-specific reset methods which can be used to * reset a single function if other methods (e.g. FLR, PM D0->D3) are -@@ -4490,6 +4591,7 @@ static const struct pci_dev_acs_enabled { +@@ -4514,6 +4615,7 @@ static const struct pci_dev_acs_enabled { { 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */ /* Cavium ThunderX */ { PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs }, @@ -195,5 +195,5 @@ index b7832fd4dbf4..f128ca4aea2b 100644 }; -- -2.11.0 +2.14.2 diff --git a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch index 0340025..3ace153 100644 --- a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch +++ b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch @@ -1,7 +1,7 @@ -From c60cb6add5a95c3e7bfb7b0d92c01ce66bfec8d6 Mon Sep 17 00:00:00 2001 +From 271e69f077ebaa56dc8953e679de08a7005d3ea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= Date: Thu, 14 Sep 2017 11:09:58 +0200 -Subject: [PATCH 4/6] kvm: disable default dynamic halt polling growth +Subject: [PATCH 4/7] kvm: disable default dynamic halt polling growth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -25,5 +25,5 @@ index 4d81f6ded88e..bfa9c4d34102 100644 EXPORT_SYMBOL_GPL(halt_poll_ns_grow); -- -2.11.0 +2.14.2 diff --git a/patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch b/patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch index e19261a..40b5a24 100644 --- a/patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch +++ b/patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch @@ -1,7 +1,7 @@ -From 014fff8b14ccd85d8e9604bbf4e812784bcfc29b Mon Sep 17 00:00:00 2001 +From c3827fb6d1d80856ca9107758e07ba677bc0d43b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 17 Aug 2017 15:33:09 -0400 -Subject: [PATCH 5/6] cgroup: Add mount flag to enable cpuset to use v2 +Subject: [PATCH 5/7] cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 @@ -62,5 +62,5 @@ index 7bf4b1533f34..ce7426b875f5 100644 opts->flags |= CGRP_ROOT_XATTR; continue; -- -2.11.0 +2.14.2 diff --git a/patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch b/patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch index 91359d5..9886905 100644 --- a/patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch +++ b/patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch @@ -1,7 +1,7 @@ -From eef40f89ccf3fc7ef5b1f88a4a6974fa7667f74f Mon Sep 17 00:00:00 2001 +From 464d0b2e2601e16236e25bfc3113ce717f7a5d02 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 17 Aug 2017 15:33:10 -0400 -Subject: [PATCH 6/6] cpuset: Allow v2 behavior in v1 cgroup +Subject: [PATCH 6/7] cpuset: Allow v2 behavior in v1 cgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -23,13 +23,14 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c -index 87a1213dd326..9b2c4babbd7f 100644 +index e8cb34193433..f76c4bf3d46a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c -@@ -300,6 +300,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); +@@ -299,6 +299,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); + static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); - /* ++/* + * Cgroup v2 behavior is used when on default hierarchy or the + * cgroup_v2_mode flag is set. + */ @@ -39,10 +40,9 @@ index 87a1213dd326..9b2c4babbd7f 100644 + (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); +} + -+/* + /* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we - * silently switch it to mount "cgroup" instead @@ -489,8 +499,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) /* On legacy hiearchy, we must be a subset of our parent cpuset. */ @@ -127,7 +127,7 @@ index 87a1213dd326..9b2c4babbd7f 100644 hotplug_update_tasks(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); else -@@ -2281,7 +2288,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) +@@ -2288,7 +2295,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; @@ -137,5 +137,5 @@ index 87a1213dd326..9b2c4babbd7f 100644 mutex_lock(&cpuset_mutex); -- -2.11.0 +2.14.2 diff --git a/patches/kernel/0007-Revert-Merge-branch-mmu_notifier_fixes.patch b/patches/kernel/0007-Revert-Merge-branch-mmu_notifier_fixes.patch new file mode 100644 index 0000000..0608d3a --- /dev/null +++ b/patches/kernel/0007-Revert-Merge-branch-mmu_notifier_fixes.patch @@ -0,0 +1,825 @@ +From dfd4ec1fd8d1d09930e9cf9ed7ebd07a66813337 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= +Date: Wed, 29 Nov 2017 09:45:44 +0100 +Subject: [PATCH 7/7] Revert "Merge branch 'mmu_notifier_fixes'" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This reverts commit ea25c43179462e342d4a0e66c3f6a5f53514da05, reversing +changes made to c227390c91a355300f47f9bef0aefbdfaaca1500. + +This series causes blue screens in Windows VMs running under heavy +memory/swap pressure. + +Signed-off-by: Fabian Grünbichler +--- + arch/arm/include/asm/kvm_host.h | 6 +++++ + arch/arm64/include/asm/kvm_host.h | 6 +++++ + arch/mips/include/asm/kvm_host.h | 5 ++++ + arch/powerpc/include/asm/kvm_host.h | 5 ++++ + arch/x86/include/asm/kvm_host.h | 2 ++ + include/linux/mm.h | 1 - + include/linux/mmu_notifier.h | 25 +++++++++++++++++++ + arch/powerpc/platforms/powernv/npu-dma.c | 10 ++++++++ + arch/x86/kvm/x86.c | 11 +++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 31 +++++++++++++++++++++++ + drivers/infiniband/core/umem_odp.c | 19 +++++++++++++++ + drivers/infiniband/hw/hfi1/mmu_rb.c | 9 +++++++ + drivers/iommu/amd_iommu_v2.c | 8 ++++++ + drivers/iommu/intel-svm.c | 9 +++++++ + drivers/misc/mic/scif/scif_dma.c | 11 +++++++++ + drivers/misc/sgi-gru/grutlbpurge.c | 12 +++++++++ + drivers/xen/gntdev.c | 8 ++++++ + fs/dax.c | 19 ++++++--------- + mm/memory.c | 26 ++++---------------- + mm/mmu_notifier.c | 14 +++++++++++ + mm/rmap.c | 35 +++----------------------- + virt/kvm/kvm_main.c | 42 ++++++++++++++++++++++++++++++++ + 22 files changed, 249 insertions(+), 65 deletions(-) + +diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h +index 4a879f6ff13b..127e2dd2e21c 100644 +--- a/arch/arm/include/asm/kvm_host.h ++++ b/arch/arm/include/asm/kvm_host.h +@@ -225,6 +225,12 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); + int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); + int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); + ++/* We do not have shadow page tables, hence the empty hooks */ ++static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ++ unsigned long address) ++{ ++} ++ + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); + struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); + void kvm_arm_halt_guest(struct kvm *kvm); +diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h +index e923b58606e2..d68630007b14 100644 +--- a/arch/arm64/include/asm/kvm_host.h ++++ b/arch/arm64/include/asm/kvm_host.h +@@ -326,6 +326,12 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); + int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); + ++/* We do not have shadow page tables, hence the empty hooks */ ++static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ++ unsigned long address) ++{ ++} ++ + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); + struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); + void kvm_arm_halt_guest(struct kvm *kvm); +diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h +index a9af1d2dcd69..2998479fd4e8 100644 +--- a/arch/mips/include/asm/kvm_host.h ++++ b/arch/mips/include/asm/kvm_host.h +@@ -938,6 +938,11 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); + int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); + ++static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ++ unsigned long address) ++{ ++} ++ + /* Emulation */ + int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); + enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); +diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h +index e372ed871c51..8b3f1238d07f 100644 +--- a/arch/powerpc/include/asm/kvm_host.h ++++ b/arch/powerpc/include/asm/kvm_host.h +@@ -67,6 +67,11 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); + extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); + extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + ++static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ++ unsigned long address) ++{ ++} ++ + #define HPTEG_CACHE_NUM (1 << 15) + #define HPTEG_HASH_BITS_PTE 13 + #define HPTEG_HASH_BITS_PTE_LONG 12 +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 92c9032502d8..f4d120a3e22e 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1375,6 +1375,8 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); + int kvm_cpu_get_interrupt(struct kvm_vcpu *v); + void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); ++void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ++ unsigned long address); + + void kvm_define_shared_msr(unsigned index, u32 msr); + int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 07630442bbf2..701de4b55ece 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1260,7 +1260,6 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, + void unmap_mapping_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen, int even_cows); + int follow_pte_pmd(struct mm_struct *mm, unsigned long address, +- unsigned long *start, unsigned long *end, + pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); + int follow_pfn(struct vm_area_struct *vma, unsigned long address, + unsigned long *pfn); +diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h +index 6866e8126982..947f21b451d2 100644 +--- a/include/linux/mmu_notifier.h ++++ b/include/linux/mmu_notifier.h +@@ -94,6 +94,17 @@ struct mmu_notifier_ops { + unsigned long address, + pte_t pte); + ++ /* ++ * Before this is invoked any secondary MMU is still ok to ++ * read/write to the page previously pointed to by the Linux ++ * pte because the page hasn't been freed yet and it won't be ++ * freed until this returns. If required set_page_dirty has to ++ * be called internally to this method. ++ */ ++ void (*invalidate_page)(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address); ++ + /* + * invalidate_range_start() and invalidate_range_end() must be + * paired and are called only when the mmap_sem and/or the +@@ -209,6 +220,8 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm, + unsigned long address); + extern void __mmu_notifier_change_pte(struct mm_struct *mm, + unsigned long address, pte_t pte); ++extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, ++ unsigned long address); + extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end); + extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, +@@ -255,6 +268,13 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, + __mmu_notifier_change_pte(mm, address, pte); + } + ++static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, ++ unsigned long address) ++{ ++ if (mm_has_notifiers(mm)) ++ __mmu_notifier_invalidate_page(mm, address); ++} ++ + static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) + { +@@ -427,6 +447,11 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, + { + } + ++static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, ++ unsigned long address) ++{ ++} ++ + static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) + { +diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c +index 2cb6cbea4b3b..3d4f879e687c 100644 +--- a/arch/powerpc/platforms/powernv/npu-dma.c ++++ b/arch/powerpc/platforms/powernv/npu-dma.c +@@ -614,6 +614,15 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, + mmio_invalidate(npu_context, 1, address, true); + } + ++static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct npu_context *npu_context = mn_to_npu_context(mn); ++ ++ mmio_invalidate(npu_context, 1, address, true); ++} ++ + static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +@@ -631,6 +640,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, + static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { + .release = pnv_npu2_mn_release, + .change_pte = pnv_npu2_mn_change_pte, ++ .invalidate_page = pnv_npu2_mn_invalidate_page, + .invalidate_range = pnv_npu2_mn_invalidate_range, + }; + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 7351cdc46cc7..a669b4dd51e7 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -6734,6 +6734,17 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) + } + EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); + ++void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ++ unsigned long address) ++{ ++ /* ++ * The physical address of apic access page is stored in the VMCS. ++ * Update it when it becomes invalid. ++ */ ++ if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) ++ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); ++} ++ + /* + * Returns 1 to let vcpu_run() continue the guest execution loop without + * exiting to the userspace. Otherwise, the value will be returned to the +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +index e1cde6b80027..6558a3ed57a7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +@@ -146,6 +146,36 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, + } + } + ++/** ++ * amdgpu_mn_invalidate_page - callback to notify about mm change ++ * ++ * @mn: our notifier ++ * @mn: the mm this callback is about ++ * @address: address of invalidate page ++ * ++ * Invalidation of a single page. Blocks for all BOs mapping it ++ * and unmap them by move them into system domain again. ++ */ ++static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); ++ struct interval_tree_node *it; ++ ++ mutex_lock(&rmn->lock); ++ ++ it = interval_tree_iter_first(&rmn->objects, address, address); ++ if (it) { ++ struct amdgpu_mn_node *node; ++ ++ node = container_of(it, struct amdgpu_mn_node, it); ++ amdgpu_mn_invalidate_node(node, address, address); ++ } ++ ++ mutex_unlock(&rmn->lock); ++} ++ + /** + * amdgpu_mn_invalidate_range_start - callback to notify about mm change + * +@@ -185,6 +215,7 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, + + static const struct mmu_notifier_ops amdgpu_mn_ops = { + .release = amdgpu_mn_release, ++ .invalidate_page = amdgpu_mn_invalidate_page, + .invalidate_range_start = amdgpu_mn_invalidate_range_start, + }; + +diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c +index 55e8f5ed8b3c..8c4ec564e495 100644 +--- a/drivers/infiniband/core/umem_odp.c ++++ b/drivers/infiniband/core/umem_odp.c +@@ -166,6 +166,24 @@ static int invalidate_page_trampoline(struct ib_umem *item, u64 start, + return 0; + } + ++static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); ++ ++ if (!context->invalidate_range) ++ return; ++ ++ ib_ucontext_notifier_start_account(context); ++ down_read(&context->umem_rwsem); ++ rbt_ib_umem_for_each_in_range(&context->umem_tree, address, ++ address + PAGE_SIZE, ++ invalidate_page_trampoline, NULL); ++ up_read(&context->umem_rwsem); ++ ib_ucontext_notifier_end_account(context); ++} ++ + static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, + u64 end, void *cookie) + { +@@ -219,6 +237,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, + + static const struct mmu_notifier_ops ib_umem_notifiers = { + .release = ib_umem_notifier_release, ++ .invalidate_page = ib_umem_notifier_invalidate_page, + .invalidate_range_start = ib_umem_notifier_invalidate_range_start, + .invalidate_range_end = ib_umem_notifier_invalidate_range_end, + }; +diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c +index e4b56a0dd6d0..ccbf52c8ff6f 100644 +--- a/drivers/infiniband/hw/hfi1/mmu_rb.c ++++ b/drivers/infiniband/hw/hfi1/mmu_rb.c +@@ -67,6 +67,8 @@ struct mmu_rb_handler { + + static unsigned long mmu_node_start(struct mmu_rb_node *); + static unsigned long mmu_node_last(struct mmu_rb_node *); ++static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, ++ unsigned long); + static inline void mmu_notifier_range_start(struct mmu_notifier *, + struct mm_struct *, + unsigned long, unsigned long); +@@ -80,6 +82,7 @@ static void do_remove(struct mmu_rb_handler *handler, + static void handle_remove(struct work_struct *work); + + static const struct mmu_notifier_ops mn_opts = { ++ .invalidate_page = mmu_notifier_page, + .invalidate_range_start = mmu_notifier_range_start, + }; + +@@ -282,6 +285,12 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, + handler->ops->remove(handler->ops_arg, node); + } + ++static inline void mmu_notifier_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, unsigned long addr) ++{ ++ mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE); ++} ++ + static inline void mmu_notifier_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, +diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c +index dccf5b76eff2..6629c472eafd 100644 +--- a/drivers/iommu/amd_iommu_v2.c ++++ b/drivers/iommu/amd_iommu_v2.c +@@ -391,6 +391,13 @@ static int mn_clear_flush_young(struct mmu_notifier *mn, + return 0; + } + ++static void mn_invalidate_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address) ++{ ++ __mn_flush_page(mn, address); ++} ++ + static void mn_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +@@ -429,6 +436,7 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) + static const struct mmu_notifier_ops iommu_mn = { + .release = mn_release, + .clear_flush_young = mn_clear_flush_young, ++ .invalidate_page = mn_invalidate_page, + .invalidate_range = mn_invalidate_range, + }; + +diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c +index f620dccec8ee..f167c0d84ebf 100644 +--- a/drivers/iommu/intel-svm.c ++++ b/drivers/iommu/intel-svm.c +@@ -223,6 +223,14 @@ static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, + intel_flush_svm_range(svm, address, 1, 1, 0); + } + ++static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); ++ ++ intel_flush_svm_range(svm, address, 1, 1, 0); ++} ++ + /* Pages have been freed at this point */ + static void intel_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, +@@ -277,6 +285,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) + static const struct mmu_notifier_ops intel_mmuops = { + .release = intel_mm_release, + .change_pte = intel_change_pte, ++ .invalidate_page = intel_invalidate_page, + .invalidate_range = intel_invalidate_range, + }; + +diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c +index 63d6246d6dff..64d5760d069a 100644 +--- a/drivers/misc/mic/scif/scif_dma.c ++++ b/drivers/misc/mic/scif/scif_dma.c +@@ -200,6 +200,16 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn, + schedule_work(&scif_info.misc_work); + } + ++static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct scif_mmu_notif *mmn; ++ ++ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); ++ scif_rma_destroy_tcw(mmn, address, PAGE_SIZE); ++} ++ + static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, +@@ -225,6 +235,7 @@ static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + static const struct mmu_notifier_ops scif_mmu_notifier_ops = { + .release = scif_mmu_notifier_release, + .clear_flush_young = NULL, ++ .invalidate_page = scif_mmu_notifier_invalidate_page, + .invalidate_range_start = scif_mmu_notifier_invalidate_range_start, + .invalidate_range_end = scif_mmu_notifier_invalidate_range_end}; + +diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c +index 9918eda0e05f..e936d43895d2 100644 +--- a/drivers/misc/sgi-gru/grutlbpurge.c ++++ b/drivers/misc/sgi-gru/grutlbpurge.c +@@ -247,6 +247,17 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn, + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); + } + ++static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, ++ ms_notifier); ++ ++ STAT(mmu_invalidate_page); ++ gru_flush_tlb_range(gms, address, PAGE_SIZE); ++ gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address); ++} ++ + static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) + { + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, +@@ -258,6 +269,7 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) + + + static const struct mmu_notifier_ops gru_mmuops = { ++ .invalidate_page = gru_invalidate_page, + .invalidate_range_start = gru_invalidate_range_start, + .invalidate_range_end = gru_invalidate_range_end, + .release = gru_release, +diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c +index 82360594fa8e..f3bf8f4e2d6c 100644 +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -484,6 +484,13 @@ static void mn_invl_range_start(struct mmu_notifier *mn, + mutex_unlock(&priv->lock); + } + ++static void mn_invl_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address) ++{ ++ mn_invl_range_start(mn, mm, address, address + PAGE_SIZE); ++} ++ + static void mn_release(struct mmu_notifier *mn, + struct mm_struct *mm) + { +@@ -515,6 +522,7 @@ static void mn_release(struct mmu_notifier *mn, + + static const struct mmu_notifier_ops gntdev_mmu_ops = { + .release = mn_release, ++ .invalidate_page = mn_invl_page, + .invalidate_range_start = mn_invl_range_start, + }; + +diff --git a/fs/dax.c b/fs/dax.c +index fa8e358c3c6b..57da1d0a6a40 100644 +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -591,10 +591,11 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, + pte_t pte, *ptep = NULL; + pmd_t *pmdp = NULL; + spinlock_t *ptl; ++ bool changed; + + i_mmap_lock_read(mapping); + vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { +- unsigned long address, start, end; ++ unsigned long address; + + cond_resched(); + +@@ -602,13 +603,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, + continue; + + address = pgoff_address(index, vma); +- +- /* +- * Note because we provide start/end to follow_pte_pmd it will +- * call mmu_notifier_invalidate_range_start() on our behalf +- * before taking any lock. +- */ +- if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) ++ changed = false; ++ if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) + continue; + + if (pmdp) { +@@ -625,7 +621,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, + pmd = pmd_wrprotect(pmd); + pmd = pmd_mkclean(pmd); + set_pmd_at(vma->vm_mm, address, pmdp, pmd); +- mmu_notifier_invalidate_range(vma->vm_mm, start, end); ++ changed = true; + unlock_pmd: + spin_unlock(ptl); + #endif +@@ -640,12 +636,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, + pte = pte_wrprotect(pte); + pte = pte_mkclean(pte); + set_pte_at(vma->vm_mm, address, ptep, pte); +- mmu_notifier_invalidate_range(vma->vm_mm, start, end); ++ changed = true; + unlock_pte: + pte_unmap_unlock(ptep, ptl); + } + +- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); ++ if (changed) ++ mmu_notifier_invalidate_page(vma->vm_mm, address); + } + i_mmap_unlock_read(mapping); + } +diff --git a/mm/memory.c b/mm/memory.c +index 969c5bf31997..7834310a6b64 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -4044,8 +4044,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) + #endif /* __PAGETABLE_PMD_FOLDED */ + + static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, +- unsigned long *start, unsigned long *end, +- pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) ++ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) + { + pgd_t *pgd; + p4d_t *p4d; +@@ -4072,29 +4071,17 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, + if (!pmdpp) + goto out; + +- if (start && end) { +- *start = address & PMD_MASK; +- *end = *start + PMD_SIZE; +- mmu_notifier_invalidate_range_start(mm, *start, *end); +- } + *ptlp = pmd_lock(mm, pmd); + if (pmd_huge(*pmd)) { + *pmdpp = pmd; + return 0; + } + spin_unlock(*ptlp); +- if (start && end) +- mmu_notifier_invalidate_range_end(mm, *start, *end); + } + + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto out; + +- if (start && end) { +- *start = address & PAGE_MASK; +- *end = *start + PAGE_SIZE; +- mmu_notifier_invalidate_range_start(mm, *start, *end); +- } + ptep = pte_offset_map_lock(mm, pmd, address, ptlp); + if (!pte_present(*ptep)) + goto unlock; +@@ -4102,8 +4089,6 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, + return 0; + unlock: + pte_unmap_unlock(ptep, *ptlp); +- if (start && end) +- mmu_notifier_invalidate_range_end(mm, *start, *end); + out: + return -EINVAL; + } +@@ -4115,21 +4100,20 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address, + + /* (void) is needed to make gcc happy */ + (void) __cond_lock(*ptlp, +- !(res = __follow_pte_pmd(mm, address, NULL, NULL, +- ptepp, NULL, ptlp))); ++ !(res = __follow_pte_pmd(mm, address, ptepp, NULL, ++ ptlp))); + return res; + } + + int follow_pte_pmd(struct mm_struct *mm, unsigned long address, +- unsigned long *start, unsigned long *end, + pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) + { + int res; + + /* (void) is needed to make gcc happy */ + (void) __cond_lock(*ptlp, +- !(res = __follow_pte_pmd(mm, address, start, end, +- ptepp, pmdpp, ptlp))); ++ !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp, ++ ptlp))); + return res; + } + EXPORT_SYMBOL(follow_pte_pmd); +diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c +index 314285284e6e..54ca54562928 100644 +--- a/mm/mmu_notifier.c ++++ b/mm/mmu_notifier.c +@@ -174,6 +174,20 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, + srcu_read_unlock(&srcu, id); + } + ++void __mmu_notifier_invalidate_page(struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct mmu_notifier *mn; ++ int id; ++ ++ id = srcu_read_lock(&srcu); ++ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { ++ if (mn->ops->invalidate_page) ++ mn->ops->invalidate_page(mn, mm, address); ++ } ++ srcu_read_unlock(&srcu, id); ++} ++ + void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) + { +diff --git a/mm/rmap.c b/mm/rmap.c +index c570f82e6827..c8993c63eb25 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -887,21 +887,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, + .address = address, + .flags = PVMW_SYNC, + }; +- unsigned long start = address, end; + int *cleaned = arg; + +- /* +- * We have to assume the worse case ie pmd for invalidation. Note that +- * the page can not be free from this function. +- */ +- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); +- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); +- + while (page_vma_mapped_walk(&pvmw)) { +- unsigned long cstart, cend; + int ret = 0; +- +- cstart = address = pvmw.address; ++ address = pvmw.address; + if (pvmw.pte) { + pte_t entry; + pte_t *pte = pvmw.pte; +@@ -914,7 +904,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, + entry = pte_wrprotect(entry); + entry = pte_mkclean(entry); + set_pte_at(vma->vm_mm, address, pte, entry); +- cend = cstart + PAGE_SIZE; + ret = 1; + } else { + #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE +@@ -929,8 +918,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, + entry = pmd_wrprotect(entry); + entry = pmd_mkclean(entry); + set_pmd_at(vma->vm_mm, address, pmd, entry); +- cstart &= PMD_MASK; +- cend = cstart + PMD_SIZE; + ret = 1; + #else + /* unexpected pmd-mapped page? */ +@@ -939,13 +926,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, + } + + if (ret) { +- mmu_notifier_invalidate_range(vma->vm_mm, cstart, cend); ++ mmu_notifier_invalidate_page(vma->vm_mm, address); + (*cleaned)++; + } + } + +- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); +- + return true; + } + +@@ -1339,7 +1324,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + pte_t pteval; + struct page *subpage; + bool ret = true; +- unsigned long start = address, end; + enum ttu_flags flags = (enum ttu_flags)arg; + + /* munlock has nothing to gain from examining un-locked vmas */ +@@ -1351,14 +1335,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + flags & TTU_MIGRATION, page); + } + +- /* +- * We have to assume the worse case ie pmd for invalidation. Note that +- * the page can not be free in this function as call of try_to_unmap() +- * must hold a reference on the page. +- */ +- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); +- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); +- + while (page_vma_mapped_walk(&pvmw)) { + /* + * If the page is mlock()d, we cannot swap it out. +@@ -1469,7 +1445,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) { + WARN_ON_ONCE(1); + ret = false; +- /* We have to invalidate as we cleared the pte */ + page_vma_mapped_walk_done(&pvmw); + break; + } +@@ -1515,12 +1490,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + discard: + page_remove_rmap(subpage, PageHuge(page)); + put_page(page); +- mmu_notifier_invalidate_range(mm, address, +- address + PAGE_SIZE); ++ mmu_notifier_invalidate_page(mm, address); + } +- +- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); +- + return ret; + } + +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index bfa9c4d34102..1d048ef969a8 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -322,6 +322,47 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) + return container_of(mn, struct kvm, mmu_notifier); + } + ++static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long address) ++{ ++ struct kvm *kvm = mmu_notifier_to_kvm(mn); ++ int need_tlb_flush, idx; ++ ++ /* ++ * When ->invalidate_page runs, the linux pte has been zapped ++ * already but the page is still allocated until ++ * ->invalidate_page returns. So if we increase the sequence ++ * here the kvm page fault will notice if the spte can't be ++ * established because the page is going to be freed. If ++ * instead the kvm page fault establishes the spte before ++ * ->invalidate_page runs, kvm_unmap_hva will release it ++ * before returning. ++ * ++ * The sequence increase only need to be seen at spin_unlock ++ * time, and not at spin_lock time. ++ * ++ * Increasing the sequence after the spin_unlock would be ++ * unsafe because the kvm page fault could then establish the ++ * pte after kvm_unmap_hva returned, without noticing the page ++ * is going to be freed. ++ */ ++ idx = srcu_read_lock(&kvm->srcu); ++ spin_lock(&kvm->mmu_lock); ++ ++ kvm->mmu_notifier_seq++; ++ need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; ++ /* we've to flush the tlb before the pages can be freed */ ++ if (need_tlb_flush) ++ kvm_flush_remote_tlbs(kvm); ++ ++ spin_unlock(&kvm->mmu_lock); ++ ++ kvm_arch_mmu_notifier_invalidate_page(kvm, address); ++ ++ srcu_read_unlock(&kvm->srcu, idx); ++} ++ + static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address, +@@ -469,6 +510,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, + } + + static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { ++ .invalidate_page = kvm_mmu_notifier_invalidate_page, + .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, + .clear_flush_young = kvm_mmu_notifier_clear_flush_young, +-- +2.14.2 +