Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 25 Jan 2020 08:57:26 +0000 (UTC)
From:      "Jason A. Harmening" <jah@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r357110 - in head/sys: kern sys
Message-ID:  <202001250857.00P8vQ4H023733@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jah
Date: Sat Jan 25 08:57:26 2020
New Revision: 357110
URL: https://svnweb.freebsd.org/changeset/base/357110

Log:
  Implement cycle-detecting garbage collector for AF_UNIX sockets
  
  The existing AF_UNIX socket garbage collector destroys any socket
  which may potentially be in a cycle, as indicated by its file reference
  count being equal to its enqueue count. However, this can produce false
  positives for in-flight sockets which aren't part of a cycle but are
  part of one or more SCM_RIGHTS mssages and which have been closed
  on the sending side. If the garbage collector happens to run at
  exactly the wrong time, destruction of these sockets will render them
  unusable on the receiving side, such that no previously-written data
  may be read.
  
  This change rewrites the garbage collector to precisely detect cycles:
  
  1. The existing check of msgcount==f_count is still used to determine
     whether the socket is potentially in a cycle.
  2. The socket is now placed on a local "dead list", which is used to
     reduce iteration time (and therefore contention on the global
     unp_link_rwlock).
  3. The first pass through the dead list removes each potentially-dead
     socket's outgoing references from the graph of potentially-dead
     sockets, using a gc-specific copy of the original reference count.
  4. The second series of passes through the dead list removes from the
     list any socket whose remaining gc refcount is non-zero, as this
     indicates the socket is actually accessible outside of any possible
     cycle.  Iteration is repeated until no further sockets are removed
     from the dead list.
  5. Sockets remaining in the dead list are destroyed as before.
  
  PR:		227285
  Submitted by:	jan.kokemueller@gmail.com (prior version)
  Reviewed by:	markj
  Differential Revision:	https://reviews.freebsd.org/D23142

Modified:
  head/sys/kern/uipc_usrreq.c
  head/sys/sys/unpcb.h

Modified: head/sys/kern/uipc_usrreq.c
==============================================================================
--- head/sys/kern/uipc_usrreq.c	Sat Jan 25 05:52:31 2020	(r357109)
+++ head/sys/kern/uipc_usrreq.c	Sat Jan 25 08:57:26 2020	(r357110)
@@ -260,7 +260,7 @@ static struct mtx	unp_defers_lock;
 #define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
 					    "unp_link_rwlock")
 
-#define	UNP_LINK_LOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
+#define	UNP_LINK_LOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_LOCKED)
 #define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_UNLOCKED)
@@ -778,6 +778,8 @@ uipc_detach(struct socket *so)
 
 	UNP_LINK_WLOCK();
 	LIST_REMOVE(unp, unp_link);
+	if (unp->unp_gcflag & UNPGC_DEAD)
+		LIST_REMOVE(unp, unp_dead);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
 	UNP_LINK_WUNLOCK();
@@ -2481,50 +2483,61 @@ unp_externalize_fp(struct file *fp)
  * synchronization.
  */
 static int	unp_marked;
-static int	unp_unreachable;
 
 static void
-unp_accessable(struct filedescent **fdep, int fdcount)
+unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
+	/*
+	 * This function can only be called from the gc task.
+	 */
+	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
+	    ("%s: not on gc callout", __func__));
+	UNP_LINK_LOCK_ASSERT();
+
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
-		if (unp->unp_gcflag & UNPGC_REF)
+		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
-		unp->unp_gcflag &= ~UNPGC_DEAD;
-		unp->unp_gcflag |= UNPGC_REF;
-		unp_marked++;
+		unp->unp_gcrefs--;
 	}
 }
 
 static void
-unp_gc_process(struct unpcb *unp)
+unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 {
-	struct socket *so, *soa;
+	struct unpcb *unp;
 	struct file *fp;
+	int i;
 
-	/* Already processed. */
-	if (unp->unp_gcflag & UNPGC_SCANNED)
-		return;
-	fp = unp->unp_file;
-
 	/*
-	 * Check for a socket potentially in a cycle.  It must be in a
-	 * queue as indicated by msgcount, and this must equal the file
-	 * reference count.  Note that when msgcount is 0 the file is NULL.
+	 * This function can only be called from the gc task.
 	 */
-	if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp &&
-	    unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) {
-		unp->unp_gcflag |= UNPGC_DEAD;
-		unp_unreachable++;
-		return;
+	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
+	    ("%s: not on gc callout", __func__));
+	UNP_LINK_LOCK_ASSERT();
+
+	for (i = 0; i < fdcount; i++) {
+		fp = fdep[i]->fde_file;
+		if ((unp = fptounp(fp)) == NULL)
+			continue;
+		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
+			continue;
+		unp->unp_gcrefs++;
+		unp_marked++;
 	}
+}
 
+static void
+unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
+{
+	struct socket *so, *soa;
+
 	so = unp->unp_socket;
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
@@ -2535,7 +2548,7 @@ unp_gc_process(struct unpcb *unp)
 			if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 				continue;
 			SOCKBUF_LOCK(&soa->so_rcv);
-			unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+			unp_scan(soa->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&soa->so_rcv);
 		}
 	} else {
@@ -2544,12 +2557,11 @@ unp_gc_process(struct unpcb *unp)
 		 */
 		if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 			SOCKBUF_LOCK(&so->so_rcv);
-			unp_scan(so->so_rcv.sb_mb, unp_accessable);
+			unp_scan(so->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		}
 	}
 	SOCK_UNLOCK(so);
-	unp->unp_gcflag |= UNPGC_SCANNED;
 }
 
 static int unp_recycled;
@@ -2560,67 +2572,115 @@ static int unp_taskcount;
 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
     "Number of times the garbage collector has run.");
 
+SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
+    "Number of active local sockets.");
+
 static void
 unp_gc(__unused void *arg, int pending)
 {
 	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 				    NULL };
 	struct unp_head **head;
+	struct unp_head unp_deadhead;	/* List of potentially-dead sockets. */
 	struct file *f, **unref;
-	struct unpcb *unp;
-	int i, total;
+	struct unpcb *unp, *unptmp;
+	int i, total, unp_unreachable;
 
+	LIST_INIT(&unp_deadhead);
 	unp_taskcount++;
 	UNP_LINK_RLOCK();
 	/*
-	 * First clear all gc flags from previous runs, apart from
-	 * UNPGC_IGNORE_RIGHTS.
+	 * First determine which sockets may be in cycles.
 	 */
+	unp_unreachable = 0;
+
 	for (head = heads; *head != NULL; head++)
-		LIST_FOREACH(unp, *head, unp_link)
-			unp->unp_gcflag =
-			    (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
+		LIST_FOREACH(unp, *head, unp_link) {
 
+			KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
+			    ("%s: unp %p has unexpected gc flags 0x%x",
+			    __func__, unp, (unsigned int)unp->unp_gcflag));
+
+			f = unp->unp_file;
+
+			/*
+			 * Check for an unreachable socket potentially in a
+			 * cycle.  It must be in a queue as indicated by
+			 * msgcount, and this must equal the file reference
+			 * count.  Note that when msgcount is 0 the file is
+			 * NULL.
+			 */
+			if (f != NULL && unp->unp_msgcount != 0 &&
+			    f->f_count == unp->unp_msgcount) {
+				LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
+				unp->unp_gcflag |= UNPGC_DEAD;
+				unp->unp_gcrefs = unp->unp_msgcount;
+				unp_unreachable++;
+			}
+		}
+
 	/*
-	 * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
-	 * is reachable all of the sockets it references are reachable.
+	 * Scan all sockets previously marked as potentially being in a cycle
+	 * and remove the references each socket holds on any UNPGC_DEAD
+	 * sockets in its queue.  After this step, all remaining references on
+	 * sockets marked UNPGC_DEAD should not be part of any cycle.
+	 */
+	LIST_FOREACH(unp, &unp_deadhead, unp_dead)
+		unp_gc_scan(unp, unp_remove_dead_ref);
+
+	/*
+	 * If a socket still has a non-negative refcount, it cannot be in a
+	 * cycle.  In this case increment refcount of all children iteratively.
 	 * Stop the scan once we do a complete loop without discovering
 	 * a new reachable socket.
 	 */
 	do {
-		unp_unreachable = 0;
 		unp_marked = 0;
-		for (head = heads; *head != NULL; head++)
-			LIST_FOREACH(unp, *head, unp_link)
-				unp_gc_process(unp);
+		LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
+			if (unp->unp_gcrefs > 0) {
+				unp->unp_gcflag &= ~UNPGC_DEAD;
+				LIST_REMOVE(unp, unp_dead);
+				KASSERT(unp_unreachable > 0,
+				    ("%s: unp_unreachable underflow.",
+				    __func__));
+				unp_unreachable--;
+				unp_gc_scan(unp, unp_restore_undead_ref);
+			}
 	} while (unp_marked);
+
 	UNP_LINK_RUNLOCK();
+
 	if (unp_unreachable == 0)
 		return;
 
 	/*
-	 * Allocate space for a local list of dead unpcbs.
+	 * Allocate space for a local array of dead unpcbs.
+	 * TODO: can this path be simplified by instead using the local
+	 * dead list at unp_deadhead, after taking out references
+	 * on the file object and/or unpcb and dropping the link lock?
 	 */
 	unref = malloc(unp_unreachable * sizeof(struct file *),
 	    M_TEMP, M_WAITOK);
 
 	/*
 	 * Iterate looking for sockets which have been specifically marked
-	 * as as unreachable and store them locally.
+	 * as unreachable and store them locally.
 	 */
 	UNP_LINK_RLOCK();
-	for (total = 0, head = heads; *head != NULL; head++)
-		LIST_FOREACH(unp, *head, unp_link)
-			if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
-				f = unp->unp_file;
-				if (unp->unp_msgcount == 0 || f == NULL ||
-				    f->f_count != unp->unp_msgcount ||
-				    !fhold(f))
-					continue;
-				unref[total++] = f;
-				KASSERT(total <= unp_unreachable,
-				    ("unp_gc: incorrect unreachable count."));
-			}
+	total = 0;
+	LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
+		KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
+		    ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
+		unp->unp_gcflag &= ~UNPGC_DEAD;
+		f = unp->unp_file;
+		if (unp->unp_msgcount == 0 || f == NULL ||
+		    f->f_count != unp->unp_msgcount ||
+		    !fhold(f))
+			continue;
+		unref[total++] = f;
+		KASSERT(total <= unp_unreachable,
+		    ("%s: incorrect unreachable count.", __func__));
+	}
 	UNP_LINK_RUNLOCK();
 
 	/*

Modified: head/sys/sys/unpcb.h
==============================================================================
--- head/sys/sys/unpcb.h	Sat Jan 25 05:52:31 2020	(r357109)
+++ head/sys/sys/unpcb.h	Sat Jan 25 08:57:26 2020	(r357110)
@@ -86,7 +86,9 @@ struct unpcb {
 	unp_gen_t unp_gencnt;		/* generation count of this instance */
 	struct	file *unp_file;		/* back-pointer to file for gc. */
 	u_int	unp_msgcount;		/* references from message queue */
+	u_int	unp_gcrefs;		/* garbage collector refcount */
 	ino_t	unp_ino;		/* fake inode number */
+	LIST_ENTRY(unpcb) unp_dead;	/* link in dead list */
 } __aligned(CACHE_LINE_SIZE);
 
 /*
@@ -113,10 +115,8 @@ struct unpcb {
 /*
  * Flags in unp_gcflag.
  */
-#define	UNPGC_REF			0x1	/* unpcb has external ref. */
-#define	UNPGC_DEAD			0x2	/* unpcb might be dead. */
-#define	UNPGC_SCANNED			0x4	/* Has been scanned. */
-#define	UNPGC_IGNORE_RIGHTS		0x8	/* Attached rights are freed */
+#define	UNPGC_DEAD			0x1	/* unpcb might be dead. */
+#define	UNPGC_IGNORE_RIGHTS		0x2	/* Attached rights are freed */
 
 #define	sotounpcb(so)	((struct unpcb *)((so)->so_pcb))
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202001250857.00P8vQ4H023733>