Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 Jan 2014 22:32:32 -0500 (EST)
From:      Rick Macklem <rmacklem@uoguelph.ca>
To:        Adrian Chadd <adrian@freebsd.org>
Cc:        FreeBSD Net <freebsd-net@freebsd.org>
Subject:   Re: 64K NFS I/O generates a 34mbuf list for TCP which breaks TSO
Message-ID:  <1856284835.584005.1391139152133.JavaMail.root@uoguelph.ca>
In-Reply-To: <CAJ-VmonGR-KQBGyKCUn=k8PDH3skB5N8br3JyDTD27%2Bz=UwJJw@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
Adrian Chadd wrote:
> On 30 January 2014 07:06, Rick Macklem <rmacklem@uoguelph.ca> wrote:
> > Hi, just adding one more idea on what to do about this
> > to the list:
> > - Add a if_hw_tsomaxseg and modify the loop in tcp_output()
> >   so that it uses both if_hw_tsomax and if_hw_tsomaxseg to
> >   decide how much to hand to the device driver in each mbuf list.
> >   (I haven't looked to see how easy it would be to change this
> >   loop.)
> 
> I don't think that's a hack. I think adding that and setting
> tsomaxseg
> to say 30 for now would be a good comprimise.
> 
Well, my TCP is very rusty and I have no way to test it (I don't
have anything that does TSO), but I've attached a stab at a patch
to do this.

Maybe it can be used as a starting point for this, if others think
it makes sense.

The "#ifdef notyet" in the patch would become something like:
# if __FreeBSD_version >= NNNN
when a change to add if_hw_tsomaxseg is done, was what I was
thinking.

rick

> 
> 
> -a
> _______________________________________________
> freebsd-net@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to
> "freebsd-net-unsubscribe@freebsd.org"
> 

[-- Attachment #2 --]
--- kern/uipc_sockbuf.c.sav	2014-01-30 20:27:17.000000000 -0500
+++ kern/uipc_sockbuf.c	2014-01-30 22:12:08.000000000 -0500
@@ -965,6 +965,39 @@ sbsndptr(struct sockbuf *sb, u_int off, 
 }
 
 /*
+ * Return the first mbuf for the provided offset.
+ */
+struct mbuf *
+sbsndmbuf(struct sockbuf *sb, u_int off, long *first_len)
+{
+	struct mbuf *m;
+
+	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
+
+	*first_len = 0;
+	/*
+	 * Is off below stored offset? Happens on retransmits.
+	 * If so, just use sb_mb.
+	 */
+	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off)
+		m = sb->sb_mb;
+	else {
+		m = sb->sb_sndptr;
+		off -= sb->sb_sndptroff;
+	}
+	while (off > 0 && m != NULL) {
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		m = m->m_next;
+	}
+	if (m != NULL)
+		*first_len = m->m_len - off;
+
+	return (m);
+}
+
+/*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
--- sys/sockbuf.h.sav	2014-01-30 20:42:28.000000000 -0500
+++ sys/sockbuf.h	2014-01-30 22:08:43.000000000 -0500
@@ -153,6 +153,8 @@ int	sbreserve_locked(struct sockbuf *sb,
 	    struct thread *td);
 struct mbuf *
 	sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff);
+struct mbuf *
+	sbsndmbuf(struct sockbuf *sb, u_int off, long *first_len);
 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
 int	sbwait(struct sockbuf *sb);
 int	sblock(struct sockbuf *sb, int flags);
--- netinet/tcp_input.c.sav	2014-01-30 19:37:52.000000000 -0500
+++ netinet/tcp_input.c	2014-01-30 19:39:07.000000000 -0500
@@ -3627,6 +3627,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 	if (cap.ifcap & CSUM_TSO) {
 		tp->t_flags |= TF_TSO;
 		tp->t_tsomax = cap.tsomax;
+		tp->t_tsomaxsegs = cap.tsomaxsegs;
 	}
 }
 
--- netinet/tcp_output.c.sav	2014-01-30 18:55:15.000000000 -0500
+++ netinet/tcp_output.c	2014-01-30 22:18:56.000000000 -0500
@@ -166,8 +166,8 @@ int
 tcp_output(struct tcpcb *tp)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
-	long len, recwin, sendwin;
-	int off, flags, error = 0;	/* Keep compiler happy */
+	long len, recwin, sendwin, tso_tlen;
+	int cnt, off, flags, error = 0;	/* Keep compiler happy */
 	struct mbuf *m;
 	struct ip *ip = NULL;
 	struct ipovly *ipov = NULL;
@@ -780,6 +780,24 @@ send:
 			}
 
 			/*
+			 * Limit the number of TSO transmit segments (mbufs
+			 * in mbuf list) to tp->t_tsomaxsegs.
+			 */
+			cnt = 0;
+			m = sbsndmbuf(&so->so_snd, off, &tso_tlen);
+			while (m != NULL && cnt < tp->t_tsomaxsegs &&
+			    tso_tlen < len) {
+				if (cnt > 0)
+					tso_tlen += m->m_len;
+				cnt++;
+				m = m->m_next;
+			}
+			if (m != NULL && tso_tlen < len) {
+				len = tso_tlen;
+				sendalot = 1;
+			}
+
+			/*
 			 * Prevent the last segment from being
 			 * fractional unless the send sockbuf can
 			 * be emptied.
--- netinet/tcp_subr.c.sav	2014-01-30 19:44:35.000000000 -0500
+++ netinet/tcp_subr.c	2014-01-30 20:56:12.000000000 -0500
@@ -1800,6 +1800,12 @@ tcp_maxmtu(struct in_conninfo *inc, stru
 			    ifp->if_hwassist & CSUM_TSO)
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
+#ifdef notyet
+				cap->tsomaxsegs = ifp->if_hw_tsomaxsegs;
+#endif
+				if (cap->tsomaxsegs == 0)
+					cap->tsomaxsegs =
+					    TCPTSO_MAX_TX_SEGS_DEFAULT;
 		}
 		RTFREE(sro.ro_rt);
 	}
--- netinet/tcp_var.h.sav	2014-01-30 19:39:22.000000000 -0500
+++ netinet/tcp_var.h	2014-01-30 20:52:57.000000000 -0500
@@ -209,6 +209,7 @@ struct tcpcb {
 	u_int	t_keepcnt;		/* number of keepalives before close */
 
 	u_int	t_tsomax;		/* tso burst length limit */
+	u_int	t_tsomaxsegs;		/* tso burst segment limit */
 
 	uint32_t t_ispare[8];		/* 5 UTO, 3 TBD */
 	void	*t_pspare2[4];		/* 4 TBD */
@@ -268,6 +269,11 @@ struct tcpcb {
 #define	TCPOOB_HAVEDATA	0x01
 #define	TCPOOB_HADDATA	0x02
 
+/*
+ * Default value for TSO maximum number of transmit segments (count of mbufs).
+ */
+#define	TCPTSO_MAX_TX_SEGS_DEFAULT	30
+
 #ifdef TCP_SIGNATURE
 /*
  * Defines which are needed by the xform_tcp module and tcp_[in|out]put
@@ -333,6 +339,7 @@ struct hc_metrics_lite {	/* must stay in
 struct tcp_ifcap {
 	int	ifcap;
 	u_int	tsomax;
+	u_int	tsomaxsegs;
 };
 
 #ifndef _NETINET_IN_PCB_H_

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1856284835.584005.1391139152133.JavaMail.root>