From owner-p4-projects@FreeBSD.ORG Mon Sep 8 04:54:07 2014 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 70A3B63C; Mon, 8 Sep 2014 04:54:07 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 9CF95B0C for ; Mon, 8 Sep 2014 04:52:31 +0000 (UTC) Received: from skunkworks.freebsd.org (skunkworks.freebsd.org [IPv6:2001:1900:2254:2068::682:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 5C3A91038 for ; Mon, 8 Sep 2014 04:52:31 +0000 (UTC) Received: from skunkworks.freebsd.org ([127.0.1.74]) by skunkworks.freebsd.org (8.14.9/8.14.9) with ESMTP id s884qVKO029915 for ; Mon, 8 Sep 2014 04:52:31 GMT (envelope-from jhb@freebsd.org) Received: (from perforce@localhost) by skunkworks.freebsd.org (8.14.9/8.14.9/Submit) id s884qVrQ029912 for perforce@freebsd.org; Mon, 8 Sep 2014 04:52:31 GMT (envelope-from jhb@freebsd.org) Date: Mon, 8 Sep 2014 04:52:31 GMT Message-Id: <201409080452.s884qVrQ029912@skunkworks.freebsd.org> X-Authentication-Warning: skunkworks.freebsd.org: perforce set sender to jhb@freebsd.org using -f From: John Baldwin Subject: PERFORCE change 1199831 for review To: Perforce Change Reviews Precedence: bulk X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.18-1 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Sep 2014 04:54:07 -0000 http://p4web.freebsd.org/@@1199831?ac=10 Change 1199831 by jhb@jhb_ralph on 2014/09/04 18:29:43 Don't hold a write lock on the pcbinfo for SYN packets. Submitted by: Julien Charbon Affected files ... .. //depot/projects/smpng/sys/netinet/tcp_input.c#163 edit .. //depot/projects/smpng/sys/netinet/tcp_syncache.c#98 edit Differences ... ==== //depot/projects/smpng/sys/netinet/tcp_input.c#163 (text+ko) ==== @@ -748,12 +748,12 @@ /* * Locate pcb for segment; if we're likely to add or remove a - * connection then first acquire pcbinfo lock. There are two cases + * connection then first acquire pcbinfo lock. There are three cases * where we might discover later we need a write lock despite the - * flags: ACKs moving a connection out of the syncache, and ACKs for - * a connection in TIMEWAIT. + * flags: ACKs moving a connection out of the syncache, ACKs for a + * connection in TIMEWAIT and SYNs not targeting a listening socket. */ - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) { + if ((thflags & (TH_FIN | TH_RST)) != 0) { INP_INFO_WLOCK(&V_tcbinfo); ti_locked = TI_WLOCKED; } else @@ -982,10 +982,11 @@ * now be in TIMEWAIT. */ #ifdef INVARIANTS - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) + if ((thflags & (TH_FIN | TH_RST)) != 0) INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #endif - if (tp->t_state != TCPS_ESTABLISHED) { + if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || + (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); @@ -1026,17 +1027,13 @@ /* * When the socket is accepting connections (the INPCB is in LISTEN * state) we look into the SYN cache if this is a new connection - * attempt or the completion of a previous one. Because listen - * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be - * held in this case. + * attempt or the completion of a previous one. */ if (so->so_options & SO_ACCEPTCONN) { struct in_conninfo inc; KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " "tp not listening", __func__)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - bzero(&inc, sizeof(inc)); #ifdef INET6 if (isipv6) { @@ -1059,6 +1056,8 @@ * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1339,8 +1338,12 @@ syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); /* * Entry added to syncache and mbuf consumed. - * Everything already unlocked by syncache_add(). + * Only the listen socket is unlocked by syncache_add(). */ + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } else if (tp->t_state == TCPS_LISTEN) { ==== //depot/projects/smpng/sys/netinet/tcp_syncache.c#98 (text+ko) ==== @@ -1118,7 +1118,6 @@ struct syncache scs; struct ucred *cred; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); /* listen socket */ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN, ("%s: unexpected tcp flags", __func__)); @@ -1149,13 +1148,11 @@ #ifdef MAC if (mac_syncache_init(&maclabel) != 0) { INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); goto done; } else mac_syncache_create(maclabel, inp); #endif INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); /* * Remember the IP options, if any.