Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 3 Nov 2019 19:02:32 +0000 (UTC)
From:      Vincenzo Maffione <vmaffione@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r354288 - head/usr.sbin/bhyve
Message-ID:  <201911031902.xA3J2WA1098571@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: vmaffione
Date: Sun Nov  3 19:02:32 2019
New Revision: 354288
URL: https://svnweb.freebsd.org/changeset/base/354288

Log:
  bhyve: add backend rx backpressure to virtio-net
  
  If a VM is flooded with more ingress packets than the guest OS
  can handle, the current virtio-net code will keep reading those
  packets and drop most of them as no space is available in the
  receive queue. This is an undesirable receive livelock, which
  is a waste of CPU and memory resources and potentially opens to
  DoS attacks.
  With this change, virtio-net uses the new netbe_rx_disable()
  function to disable ingress operation in the backend while the
  guest is short on RX buffers. Once the guest makes more buffers
  available to the RX virtqueue, ingress operation is enabled again
  by calling netbe_rx_enable().
  
  Reviewed by:	bryanv, jhb
  MFC after:	2 weeks
  Differential Revision:	https://reviews.freebsd.org/D20987

Modified:
  head/usr.sbin/bhyve/mevent.c
  head/usr.sbin/bhyve/mevent.h
  head/usr.sbin/bhyve/net_backends.c
  head/usr.sbin/bhyve/pci_e82545.c
  head/usr.sbin/bhyve/pci_virtio_net.c

Modified: head/usr.sbin/bhyve/mevent.c
==============================================================================
--- head/usr.sbin/bhyve/mevent.c	Sun Nov  3 18:53:42 2019	(r354287)
+++ head/usr.sbin/bhyve/mevent.c	Sun Nov  3 19:02:32 2019	(r354288)
@@ -321,6 +321,14 @@ mevent_add(int tfd, enum ev_type type,
 	return mevent_add_state(tfd, type, func, param, MEV_ADD);
 }
 
+struct mevent *
+mevent_add_disabled(int tfd, enum ev_type type,
+		    void (*func)(int, enum ev_type, void *), void *param)
+{
+
+	return mevent_add_state(tfd, type, func, param, MEV_ADD_DISABLED);
+}
+
 static int
 mevent_update(struct mevent *evp, int newstate)
 {

Modified: head/usr.sbin/bhyve/mevent.h
==============================================================================
--- head/usr.sbin/bhyve/mevent.h	Sun Nov  3 18:53:42 2019	(r354287)
+++ head/usr.sbin/bhyve/mevent.h	Sun Nov  3 19:02:32 2019	(r354288)
@@ -43,6 +43,9 @@ struct mevent;
 struct mevent *mevent_add(int fd, enum ev_type type, 
 			  void (*func)(int, enum ev_type, void *),
 			  void *param);
+struct mevent *mevent_add_disabled(int fd, enum ev_type type,
+			  void (*func)(int, enum ev_type, void *),
+			  void *param);
 int	mevent_enable(struct mevent *evp);
 int	mevent_disable(struct mevent *evp);
 int	mevent_delete(struct mevent *evp);

Modified: head/usr.sbin/bhyve/net_backends.c
==============================================================================
--- head/usr.sbin/bhyve/net_backends.c	Sun Nov  3 18:53:42 2019	(r354287)
+++ head/usr.sbin/bhyve/net_backends.c	Sun Nov  3 19:02:32 2019	(r354288)
@@ -220,7 +220,7 @@ tap_init(struct net_backend *be, const char *devname,
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 #endif
 
-	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
+	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
 	if (priv->mevp == NULL) {
 		WPRINTF(("Could not register event\n"));
 		goto error;
@@ -432,7 +432,7 @@ netmap_init(struct net_backend *be, const char *devnam
 	priv->cb_param = param;
 	be->fd = priv->nmd->fd;
 
-	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
+	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
 	if (priv->mevp == NULL) {
 		WPRINTF(("Could not register event\n"));
 		return (-1);

Modified: head/usr.sbin/bhyve/pci_e82545.c
==============================================================================
--- head/usr.sbin/bhyve/pci_e82545.c	Sun Nov  3 18:53:42 2019	(r354287)
+++ head/usr.sbin/bhyve/pci_e82545.c	Sun Nov  3 19:02:32 2019	(r354288)
@@ -2351,6 +2351,8 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi,
 		net_genmac(pi, sc->esc_mac.octet);
 	}
 
+	netbe_rx_enable(sc->esc_be);
+
 	/* H/w initiated reset */
 	e82545_reset(sc, 0);
 

Modified: head/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_net.c	Sun Nov  3 18:53:42 2019	(r354287)
+++ head/usr.sbin/bhyve/pci_virtio_net.c	Sun Nov  3 19:02:32 2019	(r354288)
@@ -101,7 +101,6 @@ struct pci_vtnet_softc {
 
 	net_backend_t	*vsc_be;
 
-	int		vsc_rx_ready;
 	int		resetting;	/* protected by tx_mtx */
 
 	uint64_t	vsc_features;	/* negotiated features */
@@ -156,7 +155,6 @@ pci_vtnet_reset(void *vsc)
 		pthread_mutex_lock(&sc->tx_mtx);
 	}
 
-	sc->vsc_rx_ready = 0;
 	sc->rx_merge = 1;
 	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
 
@@ -180,30 +178,29 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 	int len, n;
 	uint16_t idx;
 
-	if (!sc->vsc_rx_ready) {
-		/*
-		 * The rx ring has not yet been set up.
-		 * Drop the packet and try later.
-		 */
-		netbe_rx_discard(sc->vsc_be);
-		return;
-	}
-
-	/*
-	 * Check for available rx buffers
-	 */
 	vq = &sc->vsc_queues[VTNET_RXQ];
-	if (!vq_has_descs(vq)) {
+	for (;;) {
 		/*
-		 * No available rx buffers. Drop the packet and try later.
-		 * Interrupt on empty, if that's negotiated.
+		 * Check for available rx buffers.
 		 */
-		netbe_rx_discard(sc->vsc_be);
-		vq_endchains(vq, /*used_all_avail=*/1);
-		return;
-	}
+		if (!vq_has_descs(vq)) {
+			/* No rx buffers. Enable RX kicks and double check. */
+			vq_kick_enable(vq);
+			if (!vq_has_descs(vq)) {
+				/*
+				 * Still no buffers. Interrupt if needed
+				 * (including for NOTIFY_ON_EMPTY), and
+				 * disable the backend until the next kick.
+				 */
+				vq_endchains(vq, /*used_all_avail=*/1);
+				netbe_rx_disable(sc->vsc_be);
+				return;
+			}
 
-	do {
+			/* More rx buffers found, so keep going. */
+			vq_kick_disable(vq);
+		}
+
 		/*
 		 * Get descriptor chain.
 		 */
@@ -215,7 +212,8 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 		if (len <= 0) {
 			/*
 			 * No more packets (len == 0), or backend errored
-			 * (err < 0). Return unused available buffers.
+			 * (err < 0). Return unused available buffers
+			 * and stop.
 			 */
 			vq_retchain(vq);
 			/* Interrupt if needed/appropriate and stop. */
@@ -225,10 +223,8 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 
 		/* Publish the info to the guest */
 		vq_relchain(vq, idx, (uint32_t)len);
-	} while (vq_has_descs(vq));
+	}
 
-	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
-	vq_endchains(vq, /*used_all_avail=*/1);
 }
 
 /*
@@ -254,13 +250,11 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
 	struct pci_vtnet_softc *sc = vsc;
 
 	/*
-	 * A qnotify means that the rx process can now begin
+	 * A qnotify means that the rx process can now begin.
 	 */
 	pthread_mutex_lock(&sc->rx_mtx);
-	if (sc->vsc_rx_ready == 0) {
-		sc->vsc_rx_ready = 1;
-		vq_kick_disable(vq);
-	}
+	vq_kick_disable(vq);
+	netbe_rx_enable(sc->vsc_be);
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201911031902.xA3J2WA1098571>