Date: Thu, 22 Dec 2011 11:15:10 -0500 From: John Baldwin <jhb@freebsd.org> To: net@freebsd.org Cc: bms@freebsd.org Subject: Deferring inp_freemoptions() to an asychronous task Message-ID: <201112221115.10239.jhb@freebsd.org>
next in thread | raw e-mail | index | archive | help
I have a workload at work where a particular device driver can take a while to 
update its MAC filter table when adding or removing multicast link-layer 
addresses.  One of the ways I've tackled fixing this is to change 
inp_freemoptions() so that it does all of its actual work asychronously in a 
separate task.  Currently it does its work synchronously; however, it can be 
invoked while the associated protocol holds a write lock on its pcbinfo lock 
(e.g. from in_pcbdetach() called from udp_detach()).  This stalls all packet 
reception for that protocol since received packets need a read lock on the 
pcbinfo to lookup the socket associated with a given (ip, port) tuple.  Moving 
the meat of inp_freemoptions() out to a task moves out it from under the 
pcbinfo lock meaning that a driver that takes a while to update its MAC filter 
table no longer stalls receive processing for the entire machine.  The patch 
below is against 8, but I expect it to apply to HEAD as well.  If folks don't 
object I'll port this forward to HEAD.
Index: in_mcast.c
===================================================================
--- in_mcast.c	(revision 225431)
+++ in_mcast.c	(working copy)
@@ -46,6 +46,7 @@
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 #include <sys/ktr.h>
+#include <sys/taskqueue.h>
 #include <sys/tree.h>
 
 #include <net/if.h>
@@ -144,6 +145,8 @@
 static void	inm_reap(struct in_multi *);
 static struct ip_moptions *
 		inp_findmoptions(struct inpcb *);
+static void	inp_freemoptions_internal(struct ip_moptions *);
+static void	inp_gcmoptions(void *, int);
 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
 static int	inp_join_group(struct inpcb *, struct sockopt *);
 static int	inp_leave_group(struct inpcb *, struct sockopt *);
@@ -178,6 +181,10 @@
     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
     "Per-interface stack-wide source filters");
 
+static STAILQ_HEAD(, ip_moptions) imo_gc_list =
+    STAILQ_HEAD_INITIALIZER(imo_gc_list);
+static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
+
 /*
  * Inline function which wraps assertions for a valid ifp.
  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
@@ -1517,18 +1524,30 @@
 }
 
 /*
- * Discard the IP multicast options (and source filters).
+ * Discard the IP multicast options (and source filters).  To minimize
+ * the amount of work done while holding locks such as the INP's
+ * pcbinfo lock (which is used in the receive path), the free
+ * operation is performed asynchronously in a separate task.
  *
  * SMPng: NOTE: assumes INP write lock is held.
  */
 void
 inp_freemoptions(struct ip_moptions *imo)
 {
+
+	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+	IN_MULTI_LOCK();
+	STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
+	taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
+	IN_MULTI_UNLOCK();
+}
+
+static void
+inp_freemoptions_internal(struct ip_moptions *imo)
+{
 	struct in_mfilter	*imf;
 	size_t			 idx, nmships;
 
-	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
-
 	nmships = imo->imo_num_memberships;
 	for (idx = 0; idx < nmships; ++idx) {
 		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
@@ -1545,6 +1564,22 @@
 	free(imo, M_IPMOPTS);
 }
 
+static void
+inp_gcmoptions(void *context, int pending)
+{
+	struct ip_moptions *imo;
+
+	IN_MULTI_LOCK();
+	while (!STAILQ_EMPTY(&imo_gc_list)) {
+		imo = STAILQ_FIRST(&imo_gc_list);
+		STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
+		IN_MULTI_UNLOCK();
+		inp_freemoptions_internal(imo);
+		IN_MULTI_LOCK();
+	}
+	IN_MULTI_UNLOCK();
+}
+
 /*
  * Atomically get source filters on a socket for an IPv4 multicast group.
  * Called with INP lock held; returns with lock released.
Index: ip_var.h
===================================================================
--- ip_var.h	(revision 225431)
+++ ip_var.h	(working copy)
@@ -93,6 +93,7 @@
 	u_short	imo_max_memberships;	/* max memberships this socket */
 	struct	in_multi **imo_membership;	/* group memberships */
 	struct	in_mfilter *imo_mfilters;	/* source filters */
+	STAILQ_ENTRY(ip_moptions) imo_link;
 };
 
 struct	ipstat {
-- 
John Baldwin
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201112221115.10239.jhb>
