Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 8 Dec 2008 20:18:50 +0000 (UTC)
From:      Robert Watson <rwatson@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r185773 - head/sys/netinet
Message-ID:  <200812082018.mB8KIofH069620@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rwatson
Date: Mon Dec  8 20:18:50 2008
New Revision: 185773
URL: http://svn.freebsd.org/changeset/base/185773

Log:
  Add a reference count to struct inpcb, which may be explicitly
  incremented using in_pcbref(), and decremented using in_pcbfree()
  or inpcbrele().  Protocols using only current in_pcballoc() and
  in_pcbfree() calls will see the same semantics, but it is now
  possible for TCP to call in_pcbref() and in_pcbrele() to prevent
  an inpcb from being freed when both tcbinfo and per-inpcb locks
  are released.  This makes it possible to safely transition from
  holding only the inpcb lock to both tcbinfo and inpcb lock
  without re-looking up a connection in the input path, timer
  path, etc.
  
  Notice that in_pcbrele() does not unlock the connection after
  decrementing the refcount, if the connection remains, so that
  the caller can continue to use it; in_pcbrele() returns a flag
  indicating whether or not the inpcb pointer is still valid, and
  in_pcbfee() is now a simple wrapper around in_pcbrele().
  
  MFC after:	1 month
  Discussed with:	bz, kmacy
  Reviewed by:	bz, gnn, kmacy
  Tested by:	kmacy

Modified:
  head/sys/netinet/in_pcb.c
  head/sys/netinet/in_pcb.h

Modified: head/sys/netinet/in_pcb.c
==============================================================================
--- head/sys/netinet/in_pcb.c	Mon Dec  8 18:39:59 2008	(r185772)
+++ head/sys/netinet/in_pcb.c	Mon Dec  8 20:18:50 2008	(r185773)
@@ -1,7 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.
- * Copyright (c) 2007 Robert N. M. Watson
+ * Copyright (c) 2007-2008 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -238,6 +238,7 @@ in_pcballoc(struct socket *so, struct in
 #endif
 	INP_WLOCK(inp);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+	inp->inp_refcount = 1;	/* Reference from the inpcbinfo */
 
 #if defined(IPSEC) || defined(MAC)
 out:
@@ -872,14 +873,10 @@ in_pcbdisconnect(struct inpcb *inp)
 }
 
 /*
- * Historically, in_pcbdetach() included the functionality now found in
- * in_pcbfree() and in_pcbdrop().  They are now broken out to reflect the
- * more complex life cycle of TCP.
- *
- * in_pcbdetach() is responsibe for disconnecting the socket from an inpcb.
+ * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
  * For most protocols, this will be invoked immediately prior to calling
- * in_pcbfree().  However, for TCP the inpcb may significantly outlive the
- * socket, in which case in_pcbfree() may be deferred.
+ * in_pcbfree().  However, with TCP the inpcb may significantly outlive the
+ * socket, in which case in_pcbfree() is deferred.
  */
 void
 in_pcbdetach(struct inpcb *inp)
@@ -892,15 +889,17 @@ in_pcbdetach(struct inpcb *inp)
 }
 
 /*
- * in_pcbfree() is responsible for freeing an already-detached inpcb, as well
- * as removing it from any global inpcb lists it might be on.
+ * in_pcbfree_internal() frees an inpcb that has been detached from its
+ * socket, and whose reference count has reached 0.  It will also remove the
+ * inpcb from any global lists it might remain on.
  */
-void
-in_pcbfree(struct inpcb *inp)
+static void
+in_pcbfree_internal(struct inpcb *inp)
 {
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
+	KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__));
 
 	INP_INFO_WLOCK_ASSERT(ipi);
 	INP_WLOCK_ASSERT(inp);
@@ -932,6 +931,77 @@ in_pcbfree(struct inpcb *inp)
 }
 
 /*
+ * in_pcbref() bumps the reference count on an inpcb in order to maintain
+ * stability of an inpcb pointer despite the inpcb lock being released.  This
+ * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
+ * but where the inpcb lock is already held.
+ *
+ * While the inpcb will not be freed, releasing the inpcb lock means that the
+ * connection's state may change, so the caller should be careful to
+ * revalidate any cached state on reacquiring the lock.  Drop the reference
+ * using in_pcbrele().
+ */
+void
+in_pcbref(struct inpcb *inp)
+{
+
+	INP_WLOCK_ASSERT(inp);
+
+	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
+
+	inp->inp_refcount++;
+}
+
+/*
+ * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
+ * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
+ * return a flag indicating whether or not the inpcb remains valid.  If it is
+ * valid, we return with the inpcb lock held.
+ */
+int
+in_pcbrele(struct inpcb *inp)
+{
+#ifdef INVARIANTS
+	struct inpcbinfo *ipi = inp->inp_pcbinfo;
+#endif
+
+	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
+
+	INP_INFO_WLOCK_ASSERT(ipi);
+	INP_WLOCK_ASSERT(inp);
+
+	inp->inp_refcount--;
+	if (inp->inp_refcount > 0)
+		return (0);
+	in_pcbfree_internal(inp);
+	return (1);
+}
+
+/*
+ * Unconditionally schedule an inpcb to be freed by decrementing its
+ * reference count, which should occur only after the inpcb has been detached
+ * from its socket.  If another thread holds a temporary reference (acquired
+ * using in_pcbref()) then the free is deferred until that reference is
+ * released using in_pcbrele(), but the inpcb is still unlocked.
+ */
+void
+in_pcbfree(struct inpcb *inp)
+{
+#ifdef INVARIANTS
+	struct inpcbinfo *ipi = inp->inp_pcbinfo;
+#endif
+
+	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL",
+	    __func__));
+
+	INP_INFO_WLOCK_ASSERT(ipi);
+	INP_WLOCK_ASSERT(inp);
+
+	if (!in_pcbrele(inp))
+		INP_WUNLOCK(inp);
+}
+
+/*
  * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
  * port reservation, and preventing it from being returned by inpcb lookups.
  *

Modified: head/sys/netinet/in_pcb.h
==============================================================================
--- head/sys/netinet/in_pcb.h	Mon Dec  8 18:39:59 2008	(r185772)
+++ head/sys/netinet/in_pcb.h	Mon Dec  8 20:18:50 2008	(r185773)
@@ -170,6 +170,7 @@ struct inpcb {
 	u_char	inp_ip_p;		/* (c) protocol proto */
 	u_char	inp_ip_minttl;		/* (i) minimum TTL or drop */
 	uint32_t inp_ispare1;		/* (x) connection id / queue id */
+	u_int	inp_refcount;		/* (i) refcount */
 	void	*inp_pspare[2];		/* (x) rtentry / general use */
 
 	/* Local and foreign ports, local and foreign addr. */
@@ -478,7 +479,9 @@ struct inpcb *
 	    struct in_addr, u_int, int, struct ifnet *);
 void	in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
 	    int, struct inpcb *(*)(struct inpcb *, int));
+void	in_pcbref(struct inpcb *);
 void	in_pcbrehash(struct inpcb *);
+int	in_pcbrele(struct inpcb *);
 void	in_pcbsetsolabel(struct socket *so);
 int	in_getpeeraddr(struct socket *so, struct sockaddr **nam);
 int	in_getsockaddr(struct socket *so, struct sockaddr **nam);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200812082018.mB8KIofH069620>