From owner-svn-src-stable-10@FreeBSD.ORG  Wed May  7 07:53:25 2014
Return-Path: <owner-svn-src-stable-10@FreeBSD.ORG>
Delivered-To: svn-src-stable-10@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org
 [IPv6:2001:1900:2254:206a::19:1])
 (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits))
 (No client certificate requested)
 by hub.freebsd.org (Postfix) with ESMTPS id 43FE4BAD;
 Wed,  7 May 2014 07:53:25 +0000 (UTC)
Received: from svn.freebsd.org (svn.freebsd.org
 [IPv6:2001:1900:2254:2068::e6a:0])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client did not present a certificate)
 by mx1.freebsd.org (Postfix) with ESMTPS id 2FCBB2A3;
 Wed,  7 May 2014 07:53:25 +0000 (UTC)
Received: from svn.freebsd.org ([127.0.1.70])
 by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s477rPqf029444;
 Wed, 7 May 2014 07:53:25 GMT (envelope-from trasz@svn.freebsd.org)
Received: (from trasz@localhost)
 by svn.freebsd.org (8.14.8/8.14.8/Submit) id s477rPTP029443;
 Wed, 7 May 2014 07:53:25 GMT (envelope-from trasz@svn.freebsd.org)
Message-Id: <201405070753.s477rPTP029443@svn.freebsd.org>
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Wed, 7 May 2014 07:53:25 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
 svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject: svn commit: r265524 - stable/10/sys/dev/iscsi
X-SVN-Group: stable-10
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
X-BeenThere: svn-src-stable-10@freebsd.org
X-Mailman-Version: 2.1.18
Precedence: list
List-Id: SVN commit messages for only the 10-stable src tree
 <svn-src-stable-10.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/svn-src-stable-10>, 
 <mailto:svn-src-stable-10-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-stable-10/>
List-Post: <mailto:svn-src-stable-10@freebsd.org>
List-Help: <mailto:svn-src-stable-10-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/svn-src-stable-10>, 
 <mailto:svn-src-stable-10-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Wed, 07 May 2014 07:53:25 -0000

Author: trasz
Date: Wed May  7 07:53:24 2014
New Revision: 265524
URL: http://svnweb.freebsd.org/changeset/base/265524

Log:
  MFC r264348 by mav@:
  
  Improve use of socket buffer upcalls.
  
  Use soreadable()/sowriteable() in socket upcalls to avoid extra wakeups
  until we have enough data to read or space to write.
  
  Increase partial receive len from 1K to 128K to not wake up on every
  received packet.
  
  This significantly reduces locks congestion and CPU usage and improves
  throughput for large I/Os on NICs without TSO and LRO.
  
  MFC r264552 by mav@:
  
  Close the race in older code, that caused connection stuck after r264348.
  
  Sponsored by:	iXsystems, Inc.

Modified:
  stable/10/sys/dev/iscsi/icl.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/iscsi/icl.c
==============================================================================
--- stable/10/sys/dev/iscsi/icl.c	Wed May  7 07:48:32 2014	(r265523)
+++ stable/10/sys/dev/iscsi/icl.c	Wed May  7 07:53:24 2014	(r265524)
@@ -45,6 +45,7 @@
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
+#include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
@@ -67,7 +68,7 @@ static int coalesce = 1;
 TUNABLE_INT("kern.icl.coalesce", &coalesce);
 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
     &coalesce, 1, "Try to coalesce PDUs before sending");
-static int partial_receive_len = 1 * 1024; /* XXX: More? */
+static int partial_receive_len = 128 * 1024;
 TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
     &partial_receive_len, 1 * 1024, "Minimum read size for partially received "
@@ -750,12 +751,19 @@ icl_receive_thread(void *arg)
 			break;
 		}
 
+		/*
+		 * Set the low watermark, to be checked by
+		 * soreadable() in icl_soupcall_receive()
+		 * to avoid unneccessary wakeups until there
+		 * is enough data received to read the PDU.
+		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		available = so->so_rcv.sb_cc;
 		if (available < ic->ic_receive_len) {
 			so->so_rcv.sb_lowat = ic->ic_receive_len;
 			cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
-		}
+		} else
+			so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
 		SOCKBUF_UNLOCK(&so->so_rcv);
 
 		icl_conn_receive_pdus(ic, available);
@@ -772,6 +780,9 @@ icl_soupcall_receive(struct socket *so, 
 {
 	struct icl_conn *ic;
 
+	if (!soreadable(so))
+		return (SU_OK);
+
 	ic = arg;
 	cv_signal(&ic->ic_receive_cv);
 	return (SU_OK);
@@ -854,10 +865,10 @@ icl_conn_send_pdus(struct icl_conn *ic, 
 	available = sbspace(&so->so_snd);
 
 	/*
-	 * Notify the socket layer that it doesn't need to call
-	 * send socket upcall for the time being.
+	 * Notify the socket upcall that we don't need wakeups
+	 * for the time being.
 	 */
-	so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+	so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	while (!STAILQ_EMPTY(queue)) {
@@ -866,21 +877,26 @@ icl_conn_send_pdus(struct icl_conn *ic, 
 		request = STAILQ_FIRST(queue);
 		size = icl_pdu_size(request);
 		if (available < size) {
-#if 1
-			ICL_DEBUG("no space to send; "
-			    "have %zd, need %zd",
-			    available, size);
-#endif
 
 			/*
-			 * Set the low watermark on the socket,
+			 * Set the low watermark, to be checked by
+			 * sowritable() in icl_soupcall_send()
 			 * to avoid unneccessary wakeups until there
 			 * is enough space for the PDU to fit.
 			 */
 			SOCKBUF_LOCK(&so->so_snd);
-			so->so_snd.sb_lowat = size;
+			available = sbspace(&so->so_snd);
+			if (available < size) {
+#if 1
+				ICL_DEBUG("no space to send; "
+				    "have %zd, need %zd",
+				    available, size);
+#endif
+				so->so_snd.sb_lowat = size;
+				SOCKBUF_UNLOCK(&so->so_snd);
+				return;
+			}
 			SOCKBUF_UNLOCK(&so->so_snd);
-			return;
 		}
 		STAILQ_REMOVE_HEAD(queue, ip_next);
 		error = icl_pdu_finalize(request);
@@ -1016,6 +1032,9 @@ icl_soupcall_send(struct socket *so, voi
 {
 	struct icl_conn *ic;
 
+	if (!sowriteable(so))
+		return (SU_OK);
+
 	ic = arg;
 
 	ICL_CONN_LOCK(ic);