Date: Thu, 4 Sep 2014 19:09:08 +0000 (UTC) From: John Baldwin <jhb@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r271119 - head/sys/netinet Message-ID: <201409041909.s84J98oC093318@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jhb Date: Thu Sep 4 19:09:08 2014 New Revision: 271119 URL: http://svnweb.freebsd.org/changeset/base/271119 Log: In tcp_input(), don't acquire the pcbinfo global write lock for SYN packets targeting a listening socket. Permit to reduce TCP input processing starvation in context of high SYN load (e.g. short-lived TCP connections or SYN flood). Submitted by: Julien Charbon <jcharbon@verisign.com> Reviewed by: adrian, hiren, jhb, Mike Bentkofsky Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_syncache.c Modified: head/sys/netinet/tcp_input.c ============================================================================== --- head/sys/netinet/tcp_input.c Thu Sep 4 18:54:01 2014 (r271118) +++ head/sys/netinet/tcp_input.c Thu Sep 4 19:09:08 2014 (r271119) @@ -748,12 +748,12 @@ tcp_input(struct mbuf **mp, int *offp, i /* * Locate pcb for segment; if we're likely to add or remove a - * connection then first acquire pcbinfo lock. There are two cases + * connection then first acquire pcbinfo lock. There are three cases * where we might discover later we need a write lock despite the - * flags: ACKs moving a connection out of the syncache, and ACKs for - * a connection in TIMEWAIT. + * flags: ACKs moving a connection out of the syncache, ACKs for a + * connection in TIMEWAIT and SYNs not targeting a listening socket. */ - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) { + if ((thflags & (TH_FIN | TH_RST)) != 0) { INP_INFO_WLOCK(&V_tcbinfo); ti_locked = TI_WLOCKED; } else @@ -982,10 +982,11 @@ relocked: * now be in TIMEWAIT. */ #ifdef INVARIANTS - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) + if ((thflags & (TH_FIN | TH_RST)) != 0) INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #endif - if (tp->t_state != TCPS_ESTABLISHED) { + if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || + (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); @@ -1026,17 +1027,13 @@ relocked: /* * When the socket is accepting connections (the INPCB is in LISTEN * state) we look into the SYN cache if this is a new connection - * attempt or the completion of a previous one. Because listen - * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be - * held in this case. + * attempt or the completion of a previous one. */ if (so->so_options & SO_ACCEPTCONN) { struct in_conninfo inc; KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " "tp not listening", __func__)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - bzero(&inc, sizeof(inc)); #ifdef INET6 if (isipv6) { @@ -1059,6 +1056,8 @@ relocked: * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1339,8 +1338,12 @@ relocked: syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); /* * Entry added to syncache and mbuf consumed. - * Everything already unlocked by syncache_add(). + * Only the listen socket is unlocked by syncache_add(). */ + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } else if (tp->t_state == TCPS_LISTEN) { Modified: head/sys/netinet/tcp_syncache.c ============================================================================== --- head/sys/netinet/tcp_syncache.c Thu Sep 4 18:54:01 2014 (r271118) +++ head/sys/netinet/tcp_syncache.c Thu Sep 4 19:09:08 2014 (r271119) @@ -1118,7 +1118,6 @@ syncache_add(struct in_conninfo *inc, st struct syncache scs; struct ucred *cred; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); /* listen socket */ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN, ("%s: unexpected tcp flags", __func__)); @@ -1149,13 +1148,11 @@ syncache_add(struct in_conninfo *inc, st #ifdef MAC if (mac_syncache_init(&maclabel) != 0) { INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); goto done; } else mac_syncache_create(maclabel, inp); #endif INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); /* * Remember the IP options, if any.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201409041909.s84J98oC093318>