From owner-svn-src-all@FreeBSD.ORG Wed May 7 07:17:12 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 469B9501; Wed, 7 May 2014 07:17:12 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 3049DECA; Wed, 7 May 2014 07:17:12 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s477HBcw011053; Wed, 7 May 2014 07:17:11 GMT (envelope-from trasz@svn.freebsd.org) Received: (from trasz@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s477HBq9011049; Wed, 7 May 2014 07:17:11 GMT (envelope-from trasz@svn.freebsd.org) Message-Id: <201405070717.s477HBq9011049@svn.freebsd.org> From: Edward Tomasz Napierala Date: Wed, 7 May 2014 07:17:11 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r265502 - stable/10/sys/dev/iscsi X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 07 May 2014 07:17:12 -0000 Author: trasz Date: Wed May 7 07:17:11 2014 New Revision: 265502 URL: http://svnweb.freebsd.org/changeset/base/265502 Log: MFC r264122: Rework the iSCSI PDU transmit code to avoid lock contention and coalesce PDUs before sending. Sponsored by: The FreeBSD Foundation Modified: stable/10/sys/dev/iscsi/icl.c stable/10/sys/dev/iscsi/icl.h Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/dev/iscsi/icl.c ============================================================================== --- stable/10/sys/dev/iscsi/icl.c Wed May 7 06:46:59 2014 (r265501) +++ stable/10/sys/dev/iscsi/icl.c Wed May 7 07:17:11 2014 (r265502) @@ -63,6 +63,10 @@ static int debug = 1; TUNABLE_INT("kern.icl.debug", &debug); SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 1, "Enable debug messages"); +static int coalesce = 1; +TUNABLE_INT("kern.icl.coalesce", &coalesce); +SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, + &coalesce, 1, "Try to coalesce PDUs before sending"); static int partial_receive_len = 1 * 1024; /* XXX: More? */ TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len); SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, @@ -769,18 +773,14 @@ icl_soupcall_receive(struct socket *so, } static int -icl_pdu_send(struct icl_pdu *request) +icl_pdu_finalize(struct icl_pdu *request) { size_t padding, pdu_len; uint32_t digest, zero = 0; - int error, ok; - struct socket *so; + int ok; struct icl_conn *ic; ic = request->ip_conn; - so = request->ip_conn->ic_socket; - - ICL_CONN_LOCK_ASSERT(ic); icl_pdu_set_data_segment_length(request, request->ip_data_len); @@ -813,7 +813,7 @@ icl_pdu_send(struct icl_pdu *request) ok = m_append(request->ip_data_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { - ICL_WARN("failed to append header digest"); + ICL_WARN("failed to append data digest"); return (1); } } @@ -824,64 +824,114 @@ icl_pdu_send(struct icl_pdu *request) request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; - error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, - NULL, MSG_DONTWAIT, curthread); - request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ - if (error != 0) { - ICL_DEBUG("sosend error %d", error); - return (error); - } - return (0); } static void -icl_conn_send_pdus(struct icl_conn *ic) +icl_conn_send_pdus(struct icl_conn *ic, void *fts) { - struct icl_pdu *request; + STAILQ_HEAD(, icl_pdu) *queue = fts; /* XXX */ + struct icl_pdu *request, *request2; struct socket *so; - size_t available, size; - int error; + size_t available, size, size2; + int coalesced, error; - ICL_CONN_LOCK_ASSERT(ic); + ICL_CONN_LOCK_ASSERT_NOT(ic); so = ic->ic_socket; SOCKBUF_LOCK(&so->so_snd); + /* + * Check how much space do we have for transmit. We can't just + * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, + * as it always frees the mbuf chain passed to it, even in case + * of error. + */ available = sbspace(&so->so_snd); + + /* + * Notify the socket layer that it doesn't need to call + * send socket upcall for the time being. + */ + so->so_snd.sb_lowat = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); - while (!STAILQ_EMPTY(&ic->ic_to_send)) { + while (!STAILQ_EMPTY(queue)) { if (ic->ic_disconnecting) return; - - request = STAILQ_FIRST(&ic->ic_to_send); + request = STAILQ_FIRST(queue); size = icl_pdu_size(request); if (available < size) { +#if 1 + ICL_DEBUG("no space to send; " + "have %zd, need %zd", + available, size); +#endif + /* * Set the low watermark on the socket, - * to avoid waking up until there is enough - * space. + * to avoid unneccessary wakeups until there + * is enough space for the PDU to fit. */ SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_lowat = size; SOCKBUF_UNLOCK(&so->so_snd); -#if 1 - ICL_DEBUG("no space to send; " - "have %zd, need %zd", - available, size); -#endif return; } - available -= size; - STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); - error = icl_pdu_send(request); + STAILQ_REMOVE_HEAD(queue, ip_next); + error = icl_pdu_finalize(request); if (error != 0) { - ICL_DEBUG("failed to send PDU; " + ICL_DEBUG("failed to finalize PDU; " "dropping connection"); icl_conn_fail(ic); + icl_pdu_free(request); return; - } + } + if (coalesce) { + coalesced = 1; + for (;;) { + request2 = STAILQ_FIRST(queue); + if (request2 == NULL) + break; + size2 = icl_pdu_size(request2); + if (available < size + size2) + break; + STAILQ_REMOVE_HEAD(queue, ip_next); + error = icl_pdu_finalize(request2); + if (error != 0) { + ICL_DEBUG("failed to finalize PDU; " + "dropping connection"); + icl_conn_fail(ic); + icl_pdu_free(request); + icl_pdu_free(request2); + return; + } + m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); + request2->ip_bhs_mbuf = NULL; + request->ip_bhs_mbuf->m_pkthdr.len += size2; + size += size2; + STAILQ_REMOVE_AFTER(queue, request, ip_next); + icl_pdu_free(request2); + coalesced++; + } +#if 0 + if (coalesced > 1) { + ICL_DEBUG("coalesced %d PDUs into %zd bytes", + coalesced, size); + } +#endif + } + available -= size; + error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, + NULL, MSG_DONTWAIT, curthread); + request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ + if (error != 0) { + ICL_DEBUG("failed to send PDU, error %d; " + "dropping connection", error); + icl_conn_fail(ic); + icl_pdu_free(request); + return; + } icl_pdu_free(request); } } @@ -890,9 +940,12 @@ static void icl_send_thread(void *arg) { struct icl_conn *ic; + STAILQ_HEAD(, icl_pdu) queue; ic = arg; + STAILQ_INIT(&queue); + ICL_CONN_LOCK(ic); ic->ic_send_running = true; @@ -901,10 +954,54 @@ icl_send_thread(void *arg) //ICL_DEBUG("terminating"); break; } - icl_conn_send_pdus(ic); + + for (;;) { + /* + * If the local queue is empty, populate it from + * the main one. This way the icl_conn_send_pdus() + * can go through all the queued PDUs without holding + * any locks. + */ + if (STAILQ_EMPTY(&queue)) + STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); + + ic->ic_check_send_space = false; + ICL_CONN_UNLOCK(ic); + icl_conn_send_pdus(ic, &queue); + ICL_CONN_LOCK(ic); + + /* + * The icl_soupcall_send() was called since the last + * call to sbspace(); go around; + */ + if (ic->ic_check_send_space) + continue; + + /* + * Local queue is empty, but we still have PDUs + * in the main one; go around. + */ + if (STAILQ_EMPTY(&queue) && + !STAILQ_EMPTY(&ic->ic_to_send)) + continue; + + /* + * There might be some stuff in the local queue, + * which didn't get sent due to not having enough send + * space. Wait for socket upcall. + */ + break; + } + cv_wait(&ic->ic_send_cv, ic->ic_lock); } + /* + * We're exiting; move PDUs back to the main queue, so they can + * get freed properly. At this point ordering doesn't matter. + */ + STAILQ_CONCAT(&ic->ic_to_send, &queue); + ic->ic_send_running = false; ICL_CONN_UNLOCK(ic); kthread_exit(); @@ -916,12 +1013,19 @@ icl_soupcall_send(struct socket *so, voi struct icl_conn *ic; ic = arg; + + ICL_CONN_LOCK(ic); + ic->ic_check_send_space = true; + ICL_CONN_UNLOCK(ic); + cv_signal(&ic->ic_send_cv); + return (SU_OK); } int -icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags) +icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, + int flags) { struct mbuf *mb, *newmb; size_t copylen, off = 0; @@ -974,6 +1078,17 @@ icl_pdu_queue(struct icl_pdu *ip) icl_pdu_free(ip); return; } + + if (!STAILQ_EMPTY(&ic->ic_to_send)) { + STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); + /* + * If the queue is not empty, someone else had already + * signaled the send thread; no need to do that again, + * just return. + */ + return; + } + STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); cv_signal(&ic->ic_send_cv); } @@ -1185,6 +1300,20 @@ icl_conn_close(struct icl_conn *ic) return; } + /* + * Deregister socket upcalls. + */ + ICL_CONN_UNLOCK(ic); + SOCKBUF_LOCK(&ic->ic_socket->so_snd); + if (ic->ic_socket->so_snd.sb_upcall != NULL) + soupcall_clear(ic->ic_socket, SO_SND); + SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); + SOCKBUF_LOCK(&ic->ic_socket->so_rcv); + if (ic->ic_socket->so_rcv.sb_upcall != NULL) + soupcall_clear(ic->ic_socket, SO_RCV); + SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); + ICL_CONN_LOCK(ic); + ic->ic_disconnecting = true; /* @@ -1202,7 +1331,9 @@ icl_conn_close(struct icl_conn *ic) } //ICL_DEBUG("send/receive threads terminated"); + ICL_CONN_UNLOCK(ic); soclose(ic->ic_socket); + ICL_CONN_LOCK(ic); ic->ic_socket = NULL; if (ic->ic_receive_pdu != NULL) { Modified: stable/10/sys/dev/iscsi/icl.h ============================================================================== --- stable/10/sys/dev/iscsi/icl.h Wed May 7 06:46:59 2014 (r265501) +++ stable/10/sys/dev/iscsi/icl.h Wed May 7 07:17:11 2014 (r265502) @@ -80,6 +80,7 @@ struct icl_conn { volatile u_int ic_outstanding_pdus; #endif STAILQ_HEAD(, icl_pdu) ic_to_send; + bool ic_check_send_space; size_t ic_receive_len; int ic_receive_state; struct icl_pdu *ic_receive_pdu;