backport some fixes-fixes

Ubuntu's latest tag updated from v5.15.60 to v5.15.64 and one advantage of them trailing upstream stable 5.15.y is that we can backport fixes of fixes that got in with that commit range. Found with the report of: git log --decorate v5.15..v5.15.73 | \ ~/gitdm/stablefixes --fixed-after v5.15.64 --regressed-before v5.15.64 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2022-10-13 14:25:41 +02:00 · 2022-10-13 14:25:41 +02:00 · 3da8ec8523
commit 3da8ec8523
parent 95d22b9bec
9 changed files with 770 additions and 0 deletions
--- a/patches/kernel/0027-KVM-VMX-Heed-the-msr-argument-in-msr_write_intercept.patch
+++ b/patches/kernel/0027-KVM-VMX-Heed-the-msr-argument-in-msr_write_intercept.patch
@ -0,0 +1,41 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Wed, 10 Aug 2022 14:30:50 -0700
+Subject: [PATCH] KVM: VMX: Heed the 'msr' argument in msr_write_intercepted()
+
+[ Upstream commit 020dac4187968535f089f83f376a72beb3451311 ]
+
+Regardless of the 'msr' argument passed to the VMX version of
+msr_write_intercepted(), the function always checks to see if a
+specific MSR (IA32_SPEC_CTRL) is intercepted for write.  This behavior
+seems unintentional and unexpected.
+
+Modify the function so that it checks to see if the provided 'msr'
+index is intercepted for write.
+
+Fixes: 67f4b9969c30 ("KVM: nVMX: Handle dynamic MSR intercept toggling")
+Cc: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220810213050.2655000-1-jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kvm/vmx/vmx.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index fc0bb685283d..290f4d0aca7e 100644
+--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
+@@ -831,8 +831,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
+ 	if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
+ 		return true;
+ 
+-	return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
+-					 MSR_IA32_SPEC_CTRL);
+	return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr);
+ }
+ 
+ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
--- a/patches/kernel/0028-skmsg-Fix-wrong-last-sg-check-in-sk_msg_recvmsg.patch
+++ b/patches/kernel/0028-skmsg-Fix-wrong-last-sg-check-in-sk_msg_recvmsg.patch
@ -0,0 +1,63 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Liu Jian <liujian56@huawei.com>
+Date: Tue, 9 Aug 2022 17:49:15 +0800
+Subject: [PATCH] skmsg: Fix wrong last sg check in sk_msg_recvmsg()
+
+[ Upstream commit 583585e48d965338e73e1eb383768d16e0922d73 ]
+
+Fix one kernel NULL pointer dereference as below:
+
+[  224.462334] Call Trace:
+[  224.462394]  __tcp_bpf_recvmsg+0xd3/0x380
+[  224.462441]  ? sock_has_perm+0x78/0xa0
+[  224.462463]  tcp_bpf_recvmsg+0x12e/0x220
+[  224.462494]  inet_recvmsg+0x5b/0xd0
+[  224.462534]  __sys_recvfrom+0xc8/0x130
+[  224.462574]  ? syscall_trace_enter+0x1df/0x2e0
+[  224.462606]  ? __do_page_fault+0x2de/0x500
+[  224.462635]  __x64_sys_recvfrom+0x24/0x30
+[  224.462660]  do_syscall_64+0x5d/0x1d0
+[  224.462709]  entry_SYSCALL_64_after_hwframe+0x65/0xca
+
+In commit 9974d37ea75f ("skmsg: Fix invalid last sg check in
+sk_msg_recvmsg()"), we change last sg check to sg_is_last(),
+but in sockmap redirection case (without stream_parser/stream_verdict/
+skb_verdict), we did not mark the end of the scatterlist. Check the
+sk_msg_alloc, sk_msg_page_add, and bpf_msg_push_data functions, they all
+do not mark the end of sg. They are expected to use sg.end for end
+judgment. So the judgment of '(i != msg_rx->sg.end)' is added back here.
+
+Fixes: 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()")
+Signed-off-by: Liu Jian <liujian56@huawei.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20220809094915.150391-1-liujian56@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ net/core/skmsg.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index f50f8d95b628..23d65fe160c3 100644
+--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
+@@ -462,7 +462,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ 
+ 			if (copied == len)
+ 				break;
+-		} while (!sg_is_last(sge));
+		} while ((i != msg_rx->sg.end) && !sg_is_last(sge));
+ 
+ 		if (unlikely(peek)) {
+ 			msg_rx = sk_psock_next_msg(psock, msg_rx);
+@@ -472,7 +472,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ 		}
+ 
+ 		msg_rx->sg.start = i;
+-		if (!sge->length && sg_is_last(sge)) {
+		if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) {
+ 			msg_rx = sk_psock_dequeue_msg(psock);
+ 			kfree_sk_msg(msg_rx);
+ 		}
--- a/patches/kernel/0029-drm-i915-gt-Skip-TLB-invalidations-once-wedged.patch
+++ b/patches/kernel/0029-drm-i915-gt-Skip-TLB-invalidations-once-wedged.patch
@ -0,0 +1,51 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris.p.wilson@intel.com>
+Date: Wed, 27 Jul 2022 14:29:54 +0200
+Subject: [PATCH] drm/i915/gt: Skip TLB invalidations once wedged
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit e5a95c83ed1492c0f442b448b20c90c8faaf702b ]
+
+Skip all further TLB invalidations once the device is wedged and
+had been reset, as, on such cases, it can no longer process instructions
+on the GPU and the user no longer has access to the TLB's in each engine.
+
+So, an attempt to do a TLB cache invalidation will produce a timeout.
+
+That helps to reduce the performance regression introduced by TLB
+invalidate logic.
+
+Cc: stable@vger.kernel.org
+Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
+Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
+Cc: Fei Yang <fei.yang@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/5aa86564b9ec5fe7fe605c1dd7de76855401ed73.1658924372.git.mchehab@kernel.org
+(cherry picked from commit be0366f168033374a93e4c43fdaa1a90ab905184)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/gpu/drm/i915/gt/intel_gt.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
+index 3a76000d15bf..ed8ad3b26395 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
+@@ -949,6 +949,9 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+ 	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ 		return;
+ 
+	if (intel_gt_is_wedged(gt))
+		return;
+
+ 	if (GRAPHICS_VER(i915) == 12) {
+ 		regs = gen12_regs;
+ 		num = ARRAY_SIZE(gen12_regs);
--- a/patches/kernel/0030-net-phy-Don-t-WARN-for-PHY_UP-state-in-mdio_bus_phy_.patch
+++ b/patches/kernel/0030-net-phy-Don-t-WARN-for-PHY_UP-state-in-mdio_bus_phy_.patch
@ -0,0 +1,60 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Fri, 23 Sep 2022 06:09:52 +0200
+Subject: [PATCH] net: phy: Don't WARN for PHY_UP state in
+ mdio_bus_phy_resume()
+
+[ Upstream commit ea64cdfad124922c931633e39287c5a31a9b14a1 ]
+
+Commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume()
+state") introduced a WARN() on resume from system sleep if a PHY is not
+in PHY_HALTED state.
+
+Commit 6dbe852c379f ("net: phy: Don't WARN for PHY_READY state in
+mdio_bus_phy_resume()") added an exemption for PHY_READY state from
+the WARN().
+
+It turns out PHY_UP state needs to be exempted as well because the
+following may happen on suspend:
+
+  mdio_bus_phy_suspend()
+    phy_stop_machine()
+      phydev->state = PHY_UP  #  if (phydev->state >= PHY_UP)
+
+Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
+Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Link: https://lore.kernel.org/netdev/2b1a1588-505e-dff3-301d-bfc1fb14d685@samsung.com/
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Cc: Xiaolei Wang <xiaolei.wang@windriver.com>
+Link: https://lore.kernel.org/r/8128fdb51eeebc9efbf3776a4097363a1317aaf1.1663905575.git.lukas@wunner.de
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/net/phy/phy_device.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index b616f55ea222..c5b92ffaffb9 100644
+--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
+@@ -315,11 +315,13 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
+ 
+ 	phydev->suspended_by_mdio_bus = 0;
+ 
+-	/* If we manged to get here with the PHY state machine in a state neither
+-	 * PHY_HALTED nor PHY_READY this is an indication that something went wrong
+-	 * and we should most likely be using MAC managed PM and we are not.
+	/* If we managed to get here with the PHY state machine in a state
+	 * neither PHY_HALTED, PHY_READY nor PHY_UP, this is an indication
+	 * that something went wrong and we should most likely be using
+	 * MAC managed PM, but we are not.
+ 	 */
+-	WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY);
+	WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY &&
+		phydev->state != PHY_UP);
+ 
+ 	ret = phy_init_hw(phydev);
+ 	if (ret < 0)
--- a/patches/kernel/0031-drm-amdgpu-Don-t-enable-LTR-if-not-supported.patch
+++ b/patches/kernel/0031-drm-amdgpu-Don-t-enable-LTR-if-not-supported.patch
@ -0,0 +1,169 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Lijo Lazar <lijo.lazar@amd.com>
+Date: Thu, 8 Sep 2022 08:28:57 +0530
+Subject: [PATCH] drm/amdgpu: Don't enable LTR if not supported
+
+commit 6c20490663553cd7e07d8de8af482012329ab9d6 upstream.
+
+As per PCIE Base Spec r4.0 Section 6.18
+'Software must not enable LTR in an Endpoint unless the Root Complex
+and all intermediate Switches indicate support for LTR.'
+
+This fixes the Unsupported Request error reported through AER during
+ASPM enablement.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216455
+
+The error was unnoticed before and got visible because of the commit
+referenced below. This doesn't fix anything in the commit below, rather
+fixes the issue in amdgpu exposed by the commit. The reference is only
+to associate this commit with below one so that both go together.
+
+Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()")
+
+Reported-by: Gustaw Smolarczyk <wielkiegie@gmail.com>
+Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 9 ++++++++-
+ drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 9 ++++++++-
+ drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 9 ++++++++-
+ 3 files changed, 24 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+index b184b656b9b6..6f21154d4891 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+@@ -366,6 +366,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
+ 		WREG32_PCIE(smnPCIE_LC_CNTL, data);
+ }
+ 
+#ifdef CONFIG_PCIEASPM
+ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
+ {
+ 	uint32_t def, data;
+@@ -387,9 +388,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
+#endif
+ 
+ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ {
+#ifdef CONFIG_PCIEASPM
+ 	uint32_t def, data;
+ 
+ 	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -445,7 +448,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+ 
+-	nbio_v2_3_program_ltr(adev);
+	/* Don't bother about LTR if LTR is not enabled
+	 * in the path */
+	if (adev->pdev->ltr_path)
+		nbio_v2_3_program_ltr(adev);
+ 
+ 	def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
+ 	data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -469,6 +475,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ 	data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+ }
+ 
+ static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+index 0d2d629e2d6a..be3f6c52c3ff 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+@@ -278,6 +278,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
+ 		WREG32_PCIE(smnPCIE_CI_CNTL, data);
+ }
+ 
+#ifdef CONFIG_PCIEASPM
+ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+ {
+ 	uint32_t def, data;
+@@ -299,9 +300,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
+#endif
+ 
+ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ {
+#ifdef CONFIG_PCIEASPM
+ 	uint32_t def, data;
+ 
+ 	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -357,7 +360,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+ 
+-	nbio_v6_1_program_ltr(adev);
+	/* Don't bother about LTR if LTR is not enabled
+	 * in the path */
+	if (adev->pdev->ltr_path)
+		nbio_v6_1_program_ltr(adev);
+ 
+ 	def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ 	data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -381,6 +387,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ 	data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+ }
+ 
+ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+index f50045cebd44..74cd7543729b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+@@ -630,6 +630,7 @@ const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = {
+ 	.ras_fini = amdgpu_nbio_ras_fini,
+ };
+ 
+#ifdef CONFIG_PCIEASPM
+ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+ {
+ 	uint32_t def, data;
+@@ -651,9 +652,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
+#endif
+ 
+ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ {
+#ifdef CONFIG_PCIEASPM
+ 	uint32_t def, data;
+ 
+ 	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -709,7 +712,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+ 
+-	nbio_v7_4_program_ltr(adev);
+	/* Don't bother about LTR if LTR is not enabled
+	 * in the path */
+	if (adev->pdev->ltr_path)
+		nbio_v7_4_program_ltr(adev);
+ 
+ 	def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ 	data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -733,6 +739,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ 	data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+ }
+ 
+ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
--- a/patches/kernel/0032-drm-amdgpu-move-nbio-ih_doorbell_range-into-ih-code-.patch
+++ b/patches/kernel/0032-drm-amdgpu-move-nbio-ih_doorbell_range-into-ih-code-.patch
@ -0,0 +1,87 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 9 Sep 2022 11:47:20 -0400
+Subject: [PATCH] drm/amdgpu: move nbio ih_doorbell_range() into ih code for
+ vega
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit dc1d85cb790f2091eea074cee24a704b2d6c4a06 upstream.
+
+This mirrors what we do for other asics and this way we are
+sure the ih doorbell range is properly initialized.
+
+There is a comment about the way doorbells on gfx9 work that
+requires that they are initialized for other IPs before GFX
+is initialized.  In this case IH is initialized before GFX,
+so there should be no issue.
+
+This is a prerequisite for fixing the Unsupported Request error
+reported through AER during driver load.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373
+
+The error was unnoticed before and got visible because of the commit
+referenced below. This doesn't fix anything in the commit below, rather
+fixes the issue in amdgpu exposed by the commit. The reference is only
+to associate this commit with below one so that both go together.
+
+Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()")
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/gpu/drm/amd/amdgpu/soc15.c     | 3 ---
+ drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 4 ++++
+ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 4 ++++
+ 3 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
+index bdb47ae96ce6..723b088094f2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
+@@ -1429,9 +1429,6 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
+ 				ring->use_doorbell, ring->doorbell_index,
+ 				adev->doorbell_index.sdma_doorbell_range);
+ 		}
+-
+-		adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+-						adev->irq.ih.doorbell_index);
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+index a9ca6988009e..73728fa85997 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+@@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
+ 		}
+ 	}
+ 
+	if (!amdgpu_sriov_vf(adev))
+		adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+						    adev->irq.ih.doorbell_index);
+
+ 	pci_set_master(adev->pdev);
+ 
+ 	/* enable interrupts */
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+index f51dfc38ac65..ac34af4cb178 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+@@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
+ 		}
+ 	}
+ 
+	if (!amdgpu_sriov_vf(adev))
+		adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+						    adev->irq.ih.doorbell_index);
+
+ 	pci_set_master(adev->pdev);
+ 
+ 	/* enable interrupts */
--- a/patches/kernel/0033-drm-amdgpu-move-nbio-sdma_doorbell_range-into-sdma-c.patch
+++ b/patches/kernel/0033-drm-amdgpu-move-nbio-sdma_doorbell_range-into-sdma-c.patch
@ -0,0 +1,100 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 9 Sep 2022 11:53:27 -0400
+Subject: [PATCH] drm/amdgpu: move nbio sdma_doorbell_range() into sdma code
+ for vega
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit e3163bc8ffdfdb405e10530b140135b2ee487f89 upstream.
+
+This mirrors what we do for other asics and this way we are
+sure the sdma doorbell range is properly initialized.
+
+There is a comment about the way doorbells on gfx9 work that
+requires that they are initialized for other IPs before GFX
+is initialized.  However, the statement says that it applies to
+multimedia as well, but the VCN code currently initializes
+doorbells after GFX and there are no known issues there.  In my
+testing at least I don't see any problems on SDMA.
+
+This is a prerequisite for fixing the Unsupported Request error
+reported through AER during driver load.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373
+
+The error was unnoticed before and got visible because of the commit
+referenced below. This doesn't fix anything in the commit below, rather
+fixes the issue in amdgpu exposed by the commit. The reference is only
+to associate this commit with below one so that both go together.
+
+Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()")
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  5 +++++
+ drivers/gpu/drm/amd/amdgpu/soc15.c     | 22 ----------------------
+ 2 files changed, 5 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+index 9014f71d52dd..8b20326c4c05 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+@@ -1507,6 +1507,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
+ 		WREG32_SDMA(i, mmSDMA0_CNTL, temp);
+ 
+ 		if (!amdgpu_sriov_vf(adev)) {
+			ring = &adev->sdma.instance[i].ring;
+			adev->nbio.funcs->sdma_doorbell_range(adev, i,
+				ring->use_doorbell, ring->doorbell_index,
+				adev->doorbell_index.sdma_doorbell_range);
+
+ 			/* unhalt engine */
+ 			temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
+ 			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
+index 723b088094f2..7d5ff50435e5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
+@@ -1416,22 +1416,6 @@ static int soc15_common_sw_fini(void *handle)
+ 	return 0;
+ }
+ 
+-static void soc15_doorbell_range_init(struct amdgpu_device *adev)
+-{
+-	int i;
+-	struct amdgpu_ring *ring;
+-
+-	/* sdma/ih doorbell range are programed by hypervisor */
+-	if (!amdgpu_sriov_vf(adev)) {
+-		for (i = 0; i < adev->sdma.num_instances; i++) {
+-			ring = &adev->sdma.instance[i].ring;
+-			adev->nbio.funcs->sdma_doorbell_range(adev, i,
+-				ring->use_doorbell, ring->doorbell_index,
+-				adev->doorbell_index.sdma_doorbell_range);
+-		}
+-	}
+-}
+-
+ static int soc15_common_hw_init(void *handle)
+ {
+ 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+@@ -1451,12 +1435,6 @@ static int soc15_common_hw_init(void *handle)
+ 
+ 	/* enable the doorbell aperture */
+ 	soc15_enable_doorbell_aperture(adev, true);
+-	/* HW doorbell routing policy: doorbell writing not
+-	 * in SDMA/IH/MM/ACV range will be routed to CP. So
+-	 * we need to init SDMA/IH/MM/ACV doorbell range prior
+-	 * to CP ip block init and ring test.
+-	 */
+-	soc15_doorbell_range_init(adev);
+ 
+ 	return 0;
+ }
--- a/patches/kernel/0034-drm-amdgpu-Separate-vf2pf-work-item-init-from-virt-d.patch
+++ b/patches/kernel/0034-drm-amdgpu-Separate-vf2pf-work-item-init-from-virt-d.patch
@ -0,0 +1,130 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Victor Skvortsov <victor.skvortsov@amd.com>
+Date: Thu, 16 Dec 2021 17:01:45 +0000
+Subject: [PATCH] drm/amdgpu: Separate vf2pf work item init from virt data
+ exchange
+
+[ Upstream commit 892deb48269c65376f3eeb5b4c032ff2c2979bd7 ]
+
+We want to be able to call virt data exchange conditionally
+after gmc sw init to reserve bad pages as early as possible.
+Since this is a conditional call, we will need
+to call it again unconditionally later in the init sequence.
+
+Refactor the data exchange function so it can be
+called multiple times without re-initializing the work item.
+
+v2: Cleaned up the code. Kept the original call to init_exchange_data()
+inside early init to initialize the work item, afterwards call
+exchange_data() when needed.
+
+Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>
+Reviewed By: Shaoyun.liu <Shaoyun.liu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 36 ++++++++++++++--------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  1 +
+ 3 files changed, 30 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index d1af709cc7dc..f443b4630f9d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -2390,6 +2390,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ 
+ 		/* need to do gmc hw init early so we can allocate gpu mem */
+ 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+			/* Try to reserve bad pages early */
+			if (amdgpu_sriov_vf(adev))
+				amdgpu_virt_exchange_data(adev);
+
+ 			r = amdgpu_device_vram_scratch_init(adev);
+ 			if (r) {
+ 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
+@@ -2421,7 +2425,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ 	}
+ 
+ 	if (amdgpu_sriov_vf(adev))
+-		amdgpu_virt_init_data_exchange(adev);
+		amdgpu_virt_exchange_data(adev);
+ 
+ 	r = amdgpu_ib_pool_init(adev);
+ 	if (r) {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+index 16787c675f35..cce03aad5f0e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+@@ -614,17 +614,35 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
+ 
+ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+ {
+-	uint64_t bp_block_offset = 0;
+-	uint32_t bp_block_size = 0;
+-	struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+-
+ 	adev->virt.fw_reserve.p_pf2vf = NULL;
+ 	adev->virt.fw_reserve.p_vf2pf = NULL;
+ 	adev->virt.vf2pf_update_interval_ms = 0;
+ 
+-	if (adev->mman.fw_vram_usage_va != NULL) {
+	if (adev->bios != NULL) {
+ 		adev->virt.vf2pf_update_interval_ms = 2000;
+ 
+		adev->virt.fw_reserve.p_pf2vf =
+			(struct amd_sriov_msg_pf2vf_info_header *)
+			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+
+		amdgpu_virt_read_pf2vf_data(adev);
+	}
+
+	if (adev->virt.vf2pf_update_interval_ms != 0) {
+		INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+		schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+	}
+}
+
+
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
+{
+	uint64_t bp_block_offset = 0;
+	uint32_t bp_block_size = 0;
+	struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+
+	if (adev->mman.fw_vram_usage_va != NULL) {
+
+ 		adev->virt.fw_reserve.p_pf2vf =
+ 			(struct amd_sriov_msg_pf2vf_info_header *)
+ 			(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+@@ -655,16 +673,10 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+ 			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ 
+ 		amdgpu_virt_read_pf2vf_data(adev);
+-
+-		return;
+-	}
+-
+-	if (adev->virt.vf2pf_update_interval_ms != 0) {
+-		INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+-		schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
+ 	}
+ }
+ 
+
+ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+ {
+ 	uint32_t reg;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+index 8d4c20bb71c5..9adfb8d63280 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+@@ -308,6 +308,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
+ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
+ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
+ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
+ void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+ 
--- a/patches/kernel/0035-drm-amdgpu-make-sure-to-init-common-IP-before-gmc.patch
+++ b/patches/kernel/0035-drm-amdgpu-make-sure-to-init-common-IP-before-gmc.patch
@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Tue, 30 Aug 2022 10:59:49 -0400
+Subject: [PATCH] drm/amdgpu: make sure to init common IP before gmc
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit a8671493d2074950553da3cf07d1be43185ef6c6 ]
+
+Move common IP init before GMC init so that HDP gets
+remapped before GMC init which uses it.
+
+This fixes the Unsupported Request error reported through
+AER during driver load. The error happens as a write happens
+to the remap offset before real remapping is done.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373
+
+The error was unnoticed before and got visible because of the commit
+referenced below. This doesn't fix anything in the commit below, rather
+fixes the issue in amdgpu exposed by the commit. The reference is only
+to associate this commit with below one so that both go together.
+
+Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()")
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index f443b4630f9d..7450773821f4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -2388,8 +2388,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ 		}
+ 		adev->ip_blocks[i].status.sw = true;
+ 
+-		/* need to do gmc hw init early so we can allocate gpu mem */
+-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+			/* need to do common hw init early so everything is set up for gmc */
+			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+			if (r) {
+				DRM_ERROR("hw_init %d failed %d\n", i, r);
+				goto init_failed;
+			}
+			adev->ip_blocks[i].status.hw = true;
+		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+			/* need to do gmc hw init early so we can allocate gpu mem */
+ 			/* Try to reserve bad pages early */
+ 			if (amdgpu_sriov_vf(adev))
+ 				amdgpu_virt_exchange_data(adev);
+@@ -3037,8 +3045,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
+ 	int i, r;
+ 
+ 	static enum amd_ip_block_type ip_order[] = {
+-		AMD_IP_BLOCK_TYPE_GMC,
+ 		AMD_IP_BLOCK_TYPE_COMMON,
+		AMD_IP_BLOCK_TYPE_GMC,
+ 		AMD_IP_BLOCK_TYPE_PSP,
+ 		AMD_IP_BLOCK_TYPE_IH,
+ 	};