rebase patches

and drop those applied upstream

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
This commit is contained in:
Fabian Grünbichler 2018-03-21 11:26:28 +01:00 committed by Thomas Lamprecht
parent 8cef3abbbf
commit eac000ce29
24 changed files with 5 additions and 884 deletions

View File

@ -12,7 +12,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d81f6ded88e..bfa9c4d34102 100644
index 3b3e54742263..d0085c9d6297 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -77,7 +77,7 @@ module_param(halt_poll_ns, uint, 0644);

View File

@ -1,90 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 30 Nov 2017 19:05:45 +0100
Subject: [PATCH] KVM: x86: fix APIC page invalidation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Implementation of the unpinned APIC page didn't update the VMCS address
cache when invalidation was done through range mmu notifiers.
This became a problem when the page notifier was removed.
Re-introduce the arch-specific helper and call it from ...range_start.
Fixes: 38b9917350cb ("kvm: vmx: Implement set_apic_access_page_addr")
Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2")
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/x86.c | 14 ++++++++++++++
virt/kvm/kvm_main.c | 8 ++++++++
3 files changed, 25 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 78ec3cda9429..1953c0a5b972 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1439,4 +1439,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#endif
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f896c441fc2c..eae4aecf3cfe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6711,6 +6711,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
kvm_x86_ops->tlb_flush(vcpu);
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long apic_address;
+
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (start <= apic_address && apic_address < end)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
struct page *page = NULL;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bfa9c4d34102..d0085c9d6297 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -136,6 +136,11 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
if (pfn_valid(pfn))
@@ -361,6 +366,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
+
+ kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+
srcu_read_unlock(&kvm->srcu, idx);
}
--
2.14.2

View File

@ -1,72 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wei Xu <wexu@redhat.com>
Date: Fri, 1 Dec 2017 05:10:36 -0500
Subject: [PATCH] vhost: fix skb leak in handle_rx()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Matthew found a roughly 40% tcp throughput regression with commit
c67df11f(vhost_net: try batch dequing from skb array) as discussed
in the following thread:
https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html
Eventually we figured out that it was a skb leak in handle_rx()
when sending packets to the VM. This usually happens when a guest
can not drain out vq as fast as vhost fills in, afterwards it sets
off the traffic jam and leaks skb(s) which occurs as no headcount
to send on the vq from vhost side.
This can be avoided by making sure we have got enough headcount
before actually consuming a skb from the batched rx array while
transmitting, which is simply done by moving checking the zero
headcount a bit ahead.
Signed-off-by: Wei Xu <wexu@redhat.com>
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
drivers/vhost/net.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1c75572f5a3f..010253847022 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -781,16 +781,6 @@ static void handle_rx(struct vhost_net *net)
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
goto out;
- if (nvq->rx_array)
- msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
- /* On overrun, truncate and discard */
- if (unlikely(headcount > UIO_MAXIOV)) {
- iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
- err = sock->ops->recvmsg(sock, &msg,
- 1, MSG_DONTWAIT | MSG_TRUNC);
- pr_debug("Discarded rx packet: len %zd\n", sock_len);
- continue;
- }
/* OK, now we need to know about added descriptors. */
if (!headcount) {
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
@@ -803,6 +793,16 @@ static void handle_rx(struct vhost_net *net)
* they refilled. */
goto out;
}
+ if (nvq->rx_array)
+ msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
+ /* On overrun, truncate and discard */
+ if (unlikely(headcount > UIO_MAXIOV)) {
+ iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+ err = sock->ops->recvmsg(sock, &msg,
+ 1, MSG_DONTWAIT | MSG_TRUNC);
+ pr_debug("Discarded rx packet: len %zd\n", sock_len);
+ continue;
+ }
/* We don't need to be notified again. */
iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
fixup = msg.msg_iter;
--
2.14.2

View File

@ -1,86 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wei Xu <wexu@redhat.com>
Date: Fri, 1 Dec 2017 05:10:37 -0500
Subject: [PATCH] tun: free skb in early errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
tun_recvmsg() supports accepting skb by msg_control after
commit ac77cfd4258f ("tun: support receiving skb through msg_control"),
the skb if presented should be freed no matter how far it can go
along, otherwise it would be leaked.
This patch fixes several missed cases.
Signed-off-by: Wei Xu <wexu@redhat.com>
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
drivers/net/tun.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d1cb1ff83251..d58ae8ad0a4e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1519,8 +1519,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
tun_debug(KERN_INFO, tun, "tun_do_read\n");
- if (!iov_iter_count(to))
+ if (!iov_iter_count(to)) {
+ if (skb)
+ kfree_skb(skb);
return 0;
+ }
if (!skb) {
/* Read frames from ring */
@@ -1636,22 +1639,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
{
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
struct tun_struct *tun = __tun_get(tfile);
+ struct sk_buff *skb = m->msg_control;
int ret;
- if (!tun)
- return -EBADFD;
+ if (!tun) {
+ ret = -EBADFD;
+ goto out_free_skb;
+ }
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
ret = -EINVAL;
- goto out;
+ goto out_put_tun;
}
if (flags & MSG_ERRQUEUE) {
ret = sock_recv_errqueue(sock->sk, m, total_len,
SOL_PACKET, TUN_TX_TIMESTAMP);
goto out;
}
- ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
- m->msg_control);
+ ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
if (ret > (ssize_t)total_len) {
m->msg_flags |= MSG_TRUNC;
ret = flags & MSG_TRUNC ? ret : total_len;
@@ -1659,6 +1664,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
out:
tun_put(tun);
return ret;
+
+out_put_tun:
+ tun_put(tun);
+out_free_skb:
+ if (skb)
+ kfree_skb(skb);
+ return ret;
}
static int tun_peek_len(struct socket *sock)
--
2.14.2

View File

@ -1,58 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wei Xu <wexu@redhat.com>
Date: Fri, 1 Dec 2017 05:10:38 -0500
Subject: [PATCH] tap: free skb if flags error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
tap_recvmsg() supports accepting skb by msg_control after
commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"),
the skb if presented should be freed within the function, otherwise
it would be leaked.
Signed-off-by: Wei Xu <wexu@redhat.com>
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
drivers/net/tap.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 7a2f6bebfd15..96e5e5b2ae39 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q,
DEFINE_WAIT(wait);
ssize_t ret = 0;
- if (!iov_iter_count(to))
+ if (!iov_iter_count(to)) {
+ if (skb)
+ kfree_skb(skb);
return 0;
+ }
if (skb)
goto put;
@@ -1157,11 +1160,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m,
size_t total_len, int flags)
{
struct tap_queue *q = container_of(sock, struct tap_queue, sock);
+ struct sk_buff *skb = m->msg_control;
int ret;
- if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
+ if (skb)
+ kfree_skb(skb);
return -EINVAL;
- ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
- m->msg_control);
+ }
+ ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
if (ret > total_len) {
m->msg_flags |= MSG_TRUNC;
ret = flags & MSG_TRUNC ? ret : total_len;
--
2.14.2

View File

@ -22,7 +22,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 146caacd8fdd..80732f87cac0 100644
index 097f3daae037..b078b56234bb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -883,8 +883,16 @@ static inline short vmcs_field_to_offset(unsigned long field)

View File

@ -21,10 +21,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
1 file changed, 1 insertion(+)
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 7a3b201d51df..fb0264ef83a3 100644
index 5c3e707ff3fc..59af590b660c 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -2467,6 +2467,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
@@ -2454,6 +2454,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA:
pvt->pci_ta = pdev;

View File

@ -1,299 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 Nov 2017 13:31:12 +0100
Subject: [PATCH] kvm: vmx: Reinstate support for CPUs without virtual NMI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit 8a1b43922d0d1279e7936ba85c4c2a870403c95f upstream.
This is more or less a revert of commit 2c82878b0cb3 ("KVM: VMX: require
virtual NMI support", 2017-03-27); it turns out that Core 2 Duo machines
only had virtual NMIs in some SKUs.
The revert is not trivial because in the meanwhile there have been several
fixes to nested NMI injection. Therefore, the entire vNMI state is moved
to struct loaded_vmcs.
Another change compared to before the patch is a simplification here:
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
get_vmcs12(vcpu))))) {
The final condition here is always true (because nested_cpu_has_virtual_nmis
is always false) and is removed.
Fixes: 2c82878b0cb38fd516fd612c67852a6bbf282003
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1490803
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
arch/x86/kvm/vmx.c | 150 +++++++++++++++++++++++++++++++++++++----------------
1 file changed, 106 insertions(+), 44 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5edf05ce45de..146caacd8fdd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -204,6 +204,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
+ /* Support for vnmi-less CPUs */
+ int soft_vnmi_blocked;
+ ktime_t entry_time;
+ s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -1290,6 +1294,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
+static inline bool cpu_has_virtual_nmis(void)
+{
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1341,11 +1350,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit);
}
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
-{
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control &
@@ -3687,9 +3691,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
- PIN_BASED_VIRTUAL_NMIS;
- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -5549,7 +5553,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (!cpu_has_virtual_nmis() ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5589,6 +5594,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (!cpu_has_virtual_nmis()) {
+ /*
+ * Tracking the NMI-blocked state in software is built upon
+ * finding the next open IRQ window. This, in turn, depends on
+ * well-behaving guests: They have to keep IRQs disabled at
+ * least as long as the NMI handler runs. Otherwise we may
+ * cause NMI nesting, maybe breaking the guest. But as this is
+ * highly unlikely, we can live with the residual risk.
+ */
+ vmx->loaded_vmcs->soft_vnmi_blocked = 1;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+
++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false;
@@ -5607,6 +5625,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked;
+ if (!cpu_has_virtual_nmis())
+ return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5618,13 +5638,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ if (!cpu_has_virtual_nmis()) {
+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = masked;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+ } else {
+ vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ }
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5632,6 +5659,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
+ if (!cpu_has_virtual_nmis() &&
+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+ return 0;
+
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -6360,6 +6391,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6834,7 +6866,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
}
/* Create a new VMCS */
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
@@ -7851,6 +7883,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8568,6 +8601,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+ if (unlikely(!cpu_has_virtual_nmis() &&
+ vmx->loaded_vmcs->soft_vnmi_blocked)) {
+ if (vmx_interrupt_allowed(vcpu)) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
+ vcpu->arch.nmi_pending) {
+ /*
+ * This CPU don't support us in finding the end of an
+ * NMI-blocked window if the guest runs with IRQs
+ * disabled. So we pull the trigger after 1 s of
+ * futile waiting, but inform the user about this.
+ */
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+ "state on VCPU %d after 1 s timeout\n",
+ __func__, vcpu->vcpu_id);
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ }
+ }
+
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@ -8850,33 +8902,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (vmx->loaded_vmcs->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->loaded_vmcs->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
+ if (cpu_has_virtual_nmis()) {
+ if (vmx->loaded_vmcs->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->loaded_vmcs->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->vnmi_blocked_time +=
+ ktime_to_ns(ktime_sub(ktime_get(),
+ vmx->loaded_vmcs->entry_time));
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -8993,6 +9050,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4;
+ /* Record the guest's net vcpu time for enforced NMI injections. */
+ if (unlikely(!cpu_has_virtual_nmis() &&
+ vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->entry_time = ktime_get();
+
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
--
2.14.2

View File

@ -1,127 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 4 Oct 2017 08:44:43 -0700
Subject: [PATCH] i40e: Fix memory leak related filter programming status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It looks like we weren't correctly placing the pages from buffers that had
been used to return a filter programming status back on the ring. As a
result they were being overwritten and tracking of the pages was lost.
This change works to correct that by incorporating part of
i40e_put_rx_buffer into the programming status handler code. As a result we
should now be correctly placing the pages for those buffers on the
re-allocation list instead of letting them stay in place.
Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status")
Reported-by: Anders K. Pedersen <akp@cohaesio.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Anders K Pedersen <akp@cohaesio.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
(cherry picked from commit 2b9478ffc550f17c6cd8c69057234e91150f5972)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 63 ++++++++++++++++-------------
1 file changed, 36 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 2194960d5855..391b1878c24b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1042,6 +1042,32 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
return false;
}
+/**
+ * i40e_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *old_buff)
+{
+ struct i40e_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
+
+ new_buff = &rx_ring->rx_bi[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ new_buff->dma = old_buff->dma;
+ new_buff->page = old_buff->page;
+ new_buff->page_offset = old_buff->page_offset;
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
+}
+
/**
* i40e_rx_is_programming_status - check for programming status descriptor
* @qw: qword representing status_error_len in CPU ordering
@@ -1076,15 +1102,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc,
u64 qw)
{
- u32 ntc = rx_ring->next_to_clean + 1;
+ struct i40e_rx_buffer *rx_buffer;
+ u32 ntc = rx_ring->next_to_clean;
u8 id;
/* fetch, update, and store next to clean */
+ rx_buffer = &rx_ring->rx_bi[ntc++];
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(I40E_RX_DESC(rx_ring, ntc));
+ /* place unused page back on the ring */
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
+ rx_ring->rx_stats.page_reuse_count++;
+
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+
id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
@@ -1643,32 +1678,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
return false;
}
-/**
- * i40e_reuse_rx_page - page flip buffer and store it back on the ring
- * @rx_ring: rx descriptor ring to store buffers on
- * @old_buff: donor buffer to have page reused
- *
- * Synchronizes page for reuse by the adapter
- **/
-static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *old_buff)
-{
- struct i40e_rx_buffer *new_buff;
- u16 nta = rx_ring->next_to_alloc;
-
- new_buff = &rx_ring->rx_bi[nta];
-
- /* update, and store next to alloc */
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- /* transfer page from old buffer to new buffer */
- new_buff->dma = old_buff->dma;
- new_buff->page = old_buff->page;
- new_buff->page_offset = old_buff->page_offset;
- new_buff->pagecnt_bias = old_buff->pagecnt_bias;
-}
-
/**
* i40e_page_is_reusable - check if any reuse is possible
* @page: page struct to check
--
2.14.2

View File

@ -27,7 +27,7 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a4b6ffb61495..c5a5ad8ac00f 100644
index edb462b0b73b..e626d72868fe 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -326,8 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,

View File

@ -1,102 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Wed, 13 Sep 2017 18:42:14 +0800
Subject: [PATCH] EDAC, sb_edac: Don't create a second memory controller if HA1
is not present
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Yi Zhang reported the following failure on a 2-socket Haswell (E5-2603v3)
server (DELL PowerEdge 730xd):
EDAC sbridge: Some needed devices are missing
EDAC MC: Removed device 0 for sb_edac.c Haswell SrcID#0_Ha#0: DEV 0000:7f:12.0
EDAC MC: Removed device 1 for sb_edac.c Haswell SrcID#1_Ha#0: DEV 0000:ff:12.0
EDAC sbridge: Couldn't find mci handler
EDAC sbridge: Couldn't find mci handler
EDAC sbridge: Failed to register device with error -19.
The refactored sb_edac driver creates the IMC1 (the 2nd memory
controller) if any IMC1 device is present. In this case only
HA1_TA of IMC1 was present, but the driver expected to find
HA1/HA1_TM/HA1_TAD[0-3] devices too, leading to the above failure.
The document [1] says the 'E5-2603 v3' CPU has 4 memory channels max. Yi
Zhang inserted one DIMM per channel for each CPU, and did random error
address injection test with this patch:
4024 addresses fell in TOLM hole area
12715 addresses fell in CPU_SrcID#0_Ha#0_Chan#0_DIMM#0
12774 addresses fell in CPU_SrcID#0_Ha#0_Chan#1_DIMM#0
12798 addresses fell in CPU_SrcID#0_Ha#0_Chan#2_DIMM#0
12913 addresses fell in CPU_SrcID#0_Ha#0_Chan#3_DIMM#0
12674 addresses fell in CPU_SrcID#1_Ha#0_Chan#0_DIMM#0
12686 addresses fell in CPU_SrcID#1_Ha#0_Chan#1_DIMM#0
12882 addresses fell in CPU_SrcID#1_Ha#0_Chan#2_DIMM#0
12934 addresses fell in CPU_SrcID#1_Ha#0_Chan#3_DIMM#0
106400 addresses were injected totally.
The test result shows that all the 4 channels belong to IMC0 per CPU, so
the server really only has one IMC per CPU.
In the 1st page of chapter 2 in datasheet [2], it also says 'E5-2600 v3'
implements either one or two IMCs. For CPUs with one IMC, IMC1 is not
used and should be ignored.
Thus, do not create a second memory controller if the key HA1 is absent.
[1] http://ark.intel.com/products/83349/Intel-Xeon-Processor-E5-2603-v3-15M-Cache-1_60-GHz
[2] https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf
Reported-and-tested-by: Yi Zhang <yizhan@redhat.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Fixes: e2f747b1f42a ("EDAC, sb_edac: Assign EDAC memory controller per h/w controller")
Link: http://lkml.kernel.org/r/20170913104214.7325-1-qiuxu.zhuo@intel.com
[ Massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
(cherry picked from commit 15cc3ae001873845b5d842e212478a6570c7d938)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
drivers/edac/sb_edac.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 80d860cb0746..7a3b201d51df 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -455,6 +455,7 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
static const struct pci_id_descr pci_dev_descr_ibridge[] = {
/* Processor Home Agent */
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0, IMC0) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) },
/* Memory controller */
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0, IMC0) },
@@ -465,7 +466,6 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = {
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0, IMC0) },
/* Optional, mode 2HA */
- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) },
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1, IMC1) },
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1, IMC1) },
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1, IMC1) },
@@ -2260,6 +2260,13 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
next_imc:
sbridge_dev = get_sbridge_dev(bus, dev_descr->dom, multi_bus, sbridge_dev);
if (!sbridge_dev) {
+ /* If the HA1 wasn't found, don't create EDAC second memory controller */
+ if (dev_descr->dom == IMC1 && devno != 1) {
+ edac_dbg(0, "Skip IMC1: %04x:%04x (since HA1 was absent)\n",
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+ pci_dev_put(pdev);
+ return 0;
+ }
if (dev_descr->dom == SOCK)
goto out_imc;
--
2.14.2

View File

@ -1,45 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dick Kennedy <dick.kennedy@broadcom.com>
Date: Wed, 23 Aug 2017 16:55:31 -0700
Subject: [PATCH] scsi: lpfc: Fix loop mode target discovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The driver does not discover targets when in loop mode.
The NLP type is correctly getting set when a fabric connection is
detected but, not for loop. The unknown NLP type means that the driver
does not issue a PRLI when in loop topology. Thus target discovery
fails.
Fix by checking the topology during discovery. If it is loop, set the
NLP FC4 type to FCP.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
(cherry picked from commit 2877cbffb79ed121a6bcc5edbe629d3aba36cd29)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
drivers/scsi/lpfc/lpfc_nportdisc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index f74cb0142fd4..95b2b43ac37d 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1724,6 +1724,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
lpfc_nvme_update_localport(vport);
}
+ } else if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
+ ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+
} else if (ndlp->nlp_fc4_type == 0) {
rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID,
0, ndlp->nlp_DID);
--
2.14.2