Date: Wed, 25 Mar 2015 14:49:20 +0000 From: "Scheffenegger, Richard" <rs@netapp.com> To: "freebsd-net@freebsd.org" <freebsd-net@freebsd.org> Subject: TCP SACK improvements (RFC6675 rescue retransmission and lost retransmission detection) Message-ID: <046c2d4c51964cb698d0d27f1b8d0451@hioexcmbx05-prd.hq.netapp.com>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
Hi,
I hope this is the correct forum to ask for help improving a rather crude patch to introduce RFC6675 Rescue Retransmissions and efficient Lost Retransmission Detection. Note that this is not a full implementation of the RFC6675.
The patch that I have is against 8.0, but I believe the SACK scoreboard has not really changed and thus should be applicable still.
One outstanding issue of that patch is the missing interaction with the congestion control part, when retransmitting a lost retransmission (that should reduce cwnd once per cycle).
Also, the implementation is not very efficient, as more traversals of the scoreboard are done checking the elegibility of prior holes to be in need of being retransmitted once more...
Best regards,
Richard Scheffenegger
[-- Attachment #2 --]
--- ../netinet.orig/tcp_sack.c 2010-09-10 23:39:25.000000000 +0200
+++ ../netinet/tcp_sack.c 2010-09-23 20:37:04.000000000 +0200
@@ -340,10 +340,52 @@
/* Free this SACK hole. */
tcp_sackhole_free(tp, hole);
}
/*
+ * Calculate the sequence number, which when SACKed will indicate a
+ * lost retransmission.
+ */
+tcp_seq
+tcp_sack_doneby(int bytes, struct tcpcb *tp, struct sackhole *p)
+{
+ while ((p != NULL) && (bytes >= 0)) {
+ p = TAILQ_NEXT(p, scblink);
+ if (p != NULL) {
+ bytes -= (p->end - p->start);
+ if ((bytes >= 0) && // RFC6675 "rescue retransmission"
+ (TAILQ_NEXT(p, scblink) == NULL))
+ bytes = -min(p->end - p->start, tp->t_maxseg);
+ }
+ }
+ if (p == NULL) {
+ return tp->snd_nxt;
+ } else {
+ return (p->end + bytes);
+ }
+}
+
+/*
+ * Check wether a given sequence number has been SACKed or not.
+ */
+int
+tcp_sack_ishole(tcp_seq seq, struct tcpcb *tp, struct sackhole *p)
+{
+ int b = 0;
+ if (SEQ_GEQ(seq, tp->snd_fack)) {
+ return 1;
+ }
+ while (p != NULL) {
+ if (SEQ_LT(seq, p->start))
+ break;
+ if (SEQ_GEQ(seq, p->start) &&
+ SEQ_LT(seq, p->end)) {
+ b = 1;
+ break;
+ }
+ p = TAILQ_NEXT(p, scblink);
+ }
+ return b;
+}
+
+/*
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
*/
void
@@ -480,11 +522,15 @@
* previous hole.
*/
cur = TAILQ_PREV(cur, sackhole_head, scblink);
continue;
}
- tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
+ /*****
+ * lost again
+ *****/
+ tp->sackhint.sack_bytes_rexmit -= (
+ SEQ_MIN(cur->rxmit, cur->end) - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -506,11 +552,17 @@
} else {
/* Data acks at least the end of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
/* Move end of hole backward. */
cur->end = sblkp->start;
- cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
+ /*****
+ * lost again
+ *****/
+ if (SEQ_GEQ(cur->rxmit, cur->end)) {
+ cur->rxmit = tcp_sack_doneby(
+ tcprexmtthresh * tp->t_maxseg, tp, cur);
+ }
} else {
/*
* ACKs some data in middle of a hole; need
* to split current hole
*/
@@ -522,26 +574,47 @@
tp->sackhint.sack_bytes_rexmit
+= (temp->rxmit
- temp->start);
}
cur->end = sblkp->start;
- cur->rxmit = SEQ_MIN(cur->rxmit,
- cur->end);
+ /*****
+ * lost again
+ *****/
+ if (SEQ_GEQ(cur->rxmit, cur->end)) {
+ cur->rxmit = tcp_sack_doneby(
+ tcprexmtthresh * tp->t_maxseg, tp, cur);
#
#
#
+ }
}
}
}
- tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start);
+ /*****
+ * lost again
+ *****/
+ tp->sackhint.sack_bytes_rexmit +=
+ (SEQ_MIN(cur->rxmit, cur->end) - cur->start);
/*
* Testing sblkp->start against cur->start tells us whether
* we're done with the sack block or the sack hole.
* Accordingly, we advance one or the other.
*/
if (SEQ_LEQ(sblkp->start, cur->start))
cur = TAILQ_PREV(cur, sackhole_head, scblink);
else
sblkp--;
}
+ /*****
+ * lost again
+ *****/
+ if ((temp = tp->sackhint.nexthole) != NULL) {
+ do {
+ if (!(tcp_sack_ishole(temp->rxmit, tp, temp))) {
+ temp->rxmit = temp->start;
+ tp->sackhint.nexthole = temp;
+ //TCPSTAT_INC(tcps_sack_rexmit_lost);
+ }
+ } while ((temp = TAILQ_PREV(temp, sackhole_head, scblink)) != NULL);
+ }
}
/*
* Free all SACK holes to clear the scoreboard.
*/
--- ../netinet.orig/tcp_var.h 2010-09-10 23:39:25.000000000 +0200
+++ ../netinet/tcp_var.h 2010-09-19 20:59:42.000000000 +0200
@@ -36,10 +36,12 @@
#include <netinet/tcp.h>
#ifdef _KERNEL
#include <net/vnet.h>
+static const int tcprexmtthresh = 3;
+
/*
* Kernel variables for tcp.
*/
VNET_DECLARE(int, tcp_do_rfc1323);
VNET_DECLARE(int, tcp_reass_qsize);
@@ -457,10 +459,11 @@
/* SACK related stats */
u_long tcps_sack_recovery_episode; /* SACK recovery episodes */
u_long tcps_sack_rexmits; /* SACK rexmit segments */
u_long tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
+// u_long tcps_sack_rexmit_lost; /* SACK lost rexmit segments */
u_long tcps_sack_rcv_blocks; /* SACK blocks (options) received */
u_long tcps_sack_send_blocks; /* SACK blocks (options) sent */
u_long tcps_sack_sboverflow; /* times scoreboard overflowed */
/* ECN related stats */
@@ -699,10 +702,12 @@
extern struct pr_usrreqs tcp_usrreqs;
extern u_long tcp_sendspace;
extern u_long tcp_recvspace;
tcp_seq tcp_new_isn(struct tcpcb *);
+tcp_seq tcp_sack_doneby(int, struct tcpcb *, struct sackhole *);
+int tcp_sack_ishole(tcp_seq, struct tcpcb *, struct sackhole *);
void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
--- ../netinet.orig/tcp_input.c 2010-09-11 22:38:53.000000000 +0200
+++ ../netinet/tcp_input.c 2010-09-15 19:08:41.000000000 +0200
@@ -94,12 +94,10 @@
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
-static const int tcprexmtthresh = 3;
-
VNET_DEFINE(struct tcpstat, tcpstat);
VNET_DEFINE(int, blackhole);
VNET_DEFINE(int, tcp_delack_enabled);
VNET_DEFINE(int, drop_synfin);
VNET_DEFINE(int, tcp_do_rfc3042);
--- ../netinet.orig/tcp_output.c 2010-09-10 23:39:25.000000000 +0200
+++ ../netinet/tcp_output.c 2010-09-15 19:03:59.000000000 +0200
@@ -951,10 +951,17 @@
else
th->th_seq = htonl(tp->snd_max);
} else {
th->th_seq = htonl(p->rxmit);
p->rxmit += len;
+ /*****
+ * lost again
+ *****/
+ if (SEQ_GEQ(p->rxmit, p->end)) {
+ p->rxmit = tcp_sack_doneby(
+ tcprexmtthresh * tp->t_maxseg, tp, p);
+ }
tp->sackhint.sack_bytes_rexmit += len;
}
th->th_ack = htonl(tp->rcv_nxt);
if (optlen) {
bcopy(opt, th + 1, optlen);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?046c2d4c51964cb698d0d27f1b8d0451>
