Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 4 May 2011 21:27:05 +0000 (UTC)
From:      Michael Tuexen <tuexen@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r221460 - head/sys/netinet
Message-ID:  <201105042127.p44LR5Ho067929@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: tuexen
Date: Wed May  4 21:27:05 2011
New Revision: 221460
URL: http://svn.freebsd.org/changeset/base/221460

Log:
  Implement Resource Pooling V2 and an MPTCP like congestion
  control.
  Based on a patch received from Martin Becke.
  
  MFC after: 2 weeks.

Modified:
  head/sys/netinet/sctp.h
  head/sys/netinet/sctp_cc_functions.c
  head/sys/netinet/sctp_sysctl.h
  head/sys/netinet/sctp_usrreq.c

Modified: head/sys/netinet/sctp.h
==============================================================================
--- head/sys/netinet/sctp.h	Wed May  4 20:29:40 2011	(r221459)
+++ head/sys/netinet/sctp.h	Wed May  4 21:27:05 2011	(r221460)
@@ -265,6 +265,13 @@ struct sctp_paramhdr {
 #define SCTP_CC_OPT_USE_DCCC_ECN	0x00002001
 #define SCTP_CC_OPT_STEADY_STEP         0x00002002
 
+#define SCTP_CMT_OFF            0
+#define SCTP_CMT_BASE           1
+#define SCTP_CMT_RPV1           2
+#define SCTP_CMT_RPV2           3
+#define SCTP_CMT_MPTCP          4
+#define SCTP_CMT_MAX            SCTP_CMT_MPTCP
+
 /* RS - Supported stream scheduling modules for pluggable
  * stream scheduling
  */

Modified: head/sys/netinet/sctp_cc_functions.c
==============================================================================
--- head/sys/netinet/sctp_cc_functions.c	Wed May  4 20:29:40 2011	(r221459)
+++ head/sys/netinet/sctp_cc_functions.c	Wed May  4 21:27:05 2011	(r221460)
@@ -47,6 +47,10 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#define SHIFT_MPTCP_MULTI_N 40
+#define SHIFT_MPTCP_MULTI_Z 16
+#define SHIFT_MPTCP_MULTI 8
+
 static void
 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
@@ -67,7 +71,8 @@ sctp_set_initial_cc_param(struct sctp_tc
 			cwnd_in_mtu = assoc->max_burst;
 		net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
 	}
-	if (stcb->asoc.sctp_cmt_on_off == 2) {
+	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
+	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
 		/* In case of resource pooling initialize appropriately */
 		net->cwnd /= assoc->numnets;
 		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
@@ -91,14 +96,23 @@ sctp_cwnd_update_after_fr(struct sctp_tc
 {
 	struct sctp_nets *net;
 	uint32_t t_ssthresh, t_cwnd;
+	uint64_t t_ucwnd_sbw;
 
 	/* MT FIXME: Don't compute this over and over again */
 	t_ssthresh = 0;
 	t_cwnd = 0;
-	if (asoc->sctp_cmt_on_off == 2) {
+	t_ucwnd_sbw = 0;
+	if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
+	    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 			t_ssthresh += net->ssthresh;
 			t_cwnd += net->cwnd;
+			if (net->lastsa > 0) {
+				t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa;
+			}
+		}
+		if (t_ucwnd_sbw == 0) {
+			t_ucwnd_sbw = 1;
 		}
 	}
 	/*-
@@ -119,11 +133,37 @@ sctp_cwnd_update_after_fr(struct sctp_tc
 				struct sctp_tmit_chunk *lchk;
 				int old_cwnd = net->cwnd;
 
-				if (asoc->sctp_cmt_on_off == 2) {
-					net->ssthresh = (uint32_t) (((uint64_t) 4 *
-					    (uint64_t) net->mtu *
-					    (uint64_t) net->ssthresh) /
-					    (uint64_t) t_ssthresh);
+				if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
+				    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
+					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) {
+						net->ssthresh = (uint32_t) (((uint64_t) 4 *
+						    (uint64_t) net->mtu *
+						    (uint64_t) net->ssthresh) /
+						    (uint64_t) t_ssthresh);
+
+					}
+					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) {
+						uint32_t srtt;
+
+						srtt = net->lastsa;
+						/*
+						 * lastsa>>3;  we don't need
+						 * to devide ...
+						 */
+						if (srtt == 0) {
+							srtt = 1;
+						}
+						/*
+						 * Short Version => Equal to
+						 * Contel Version MBe
+						 */
+						net->ssthresh = (uint32_t) (((uint64_t) 4 *
+						    (uint64_t) net->mtu *
+						    (uint64_t) net->cwnd) /
+						    ((uint64_t) srtt *
+						    t_ucwnd_sbw));
+						 /* INCREASE FACTOR */ ;
+					}
 					if ((net->cwnd > t_cwnd / 2) &&
 					    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
 						net->ssthresh = net->cwnd - t_cwnd / 2;
@@ -629,14 +669,47 @@ sctp_cwnd_update_after_sack_common(struc
 	struct sctp_nets *net;
 	int old_cwnd;
 	uint32_t t_ssthresh, t_cwnd, incr;
+	uint64_t t_ucwnd_sbw;
+	uint64_t t_path_mptcp;
+	uint64_t mptcp_like_alpha;
+	uint32_t srtt;
+	uint64_t max_path;
 
 	/* MT FIXME: Don't compute this over and over again */
 	t_ssthresh = 0;
 	t_cwnd = 0;
-	if (stcb->asoc.sctp_cmt_on_off == 2) {
+	t_ucwnd_sbw = 0;
+	t_path_mptcp = 0;
+	mptcp_like_alpha = 1;
+	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
+	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) ||
+	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) {
+		max_path = 0;
 		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 			t_ssthresh += net->ssthresh;
 			t_cwnd += net->cwnd;
+			/* lastsa>>3;  we don't need to devide ... */
+			srtt = net->lastsa;
+			if (srtt > 0) {
+				uint64_t tmp;
+
+				t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt;
+				t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) /
+				    (((uint64_t) net->mtu) * (uint64_t) srtt);
+				tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) /
+				    ((uint64_t) net->mtu * (uint64_t) (srtt * srtt));
+				if (tmp > max_path) {
+					max_path = tmp;
+				}
+			}
+		}
+		if (t_ucwnd_sbw == 0) {
+			t_ucwnd_sbw = 1;
+		}
+		if (t_path_mptcp > 0) {
+			mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp);
+		} else {
+			mptcp_like_alpha = 1;
 		}
 	}
 	/******************************/
@@ -818,10 +891,11 @@ sctp_cwnd_update_after_sack_common(struc
 			if (net->cwnd <= net->ssthresh) {
 				/* We are in slow start */
 				if (net->flight_size + net->net_ack >= net->cwnd) {
-					old_cwnd = net->cwnd;
-					if (stcb->asoc.sctp_cmt_on_off == 2) {
-						uint32_t limit;
+					uint32_t limit;
 
+					old_cwnd = net->cwnd;
+					switch (asoc->sctp_cmt_on_off) {
+					case SCTP_CMT_RPV1:
 						limit = (uint32_t) (((uint64_t) net->mtu *
 						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
 						    (uint64_t) net->ssthresh) /
@@ -835,11 +909,56 @@ sctp_cwnd_update_after_sack_common(struc
 						if (incr == 0) {
 							incr = 1;
 						}
-					} else {
+						break;
+					case SCTP_CMT_RPV2:
+						/*
+						 * lastsa>>3;  we don't need
+						 * to divide ...
+						 */
+						srtt = net->lastsa;
+						if (srtt == 0) {
+							srtt = 1;
+						}
+						limit = (uint32_t) (((uint64_t) net->mtu *
+						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
+						    (uint64_t) net->cwnd) /
+						    ((uint64_t) srtt * t_ucwnd_sbw));
+						/* INCREASE FACTOR */
+						incr = (uint32_t) (((uint64_t) net->net_ack *
+						    (uint64_t) net->cwnd) /
+						    ((uint64_t) srtt * t_ucwnd_sbw));
+						/* INCREASE FACTOR */
+						if (incr > limit) {
+							incr = limit;
+						}
+						if (incr == 0) {
+							incr = 1;
+						}
+						break;
+					case SCTP_CMT_MPTCP:
+						limit = (uint32_t) (((uint64_t) net->mtu *
+						    mptcp_like_alpha *
+						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >>
+						    SHIFT_MPTCP_MULTI);
+						incr = (uint32_t) (((uint64_t) net->net_ack *
+						    mptcp_like_alpha) >>
+						    SHIFT_MPTCP_MULTI);
+						if (incr > limit) {
+							incr = limit;
+						}
+						if (incr > net->net_ack) {
+							incr = net->net_ack;
+						}
+						if (incr > net->mtu) {
+							incr = net->mtu;
+						}
+						break;
+					default:
 						incr = net->net_ack;
 						if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) {
 							incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable);
 						}
+						break;
 					}
 					net->cwnd += incr;
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
@@ -868,15 +987,44 @@ sctp_cwnd_update_after_sack_common(struc
 				    (net->partial_bytes_acked >= net->cwnd)) {
 					net->partial_bytes_acked -= net->cwnd;
 					old_cwnd = net->cwnd;
-					if (asoc->sctp_cmt_on_off == 2) {
+					switch (asoc->sctp_cmt_on_off) {
+					case SCTP_CMT_RPV1:
 						incr = (uint32_t) (((uint64_t) net->mtu *
 						    (uint64_t) net->ssthresh) /
 						    (uint64_t) t_ssthresh);
 						if (incr == 0) {
 							incr = 1;
 						}
-					} else {
+						break;
+					case SCTP_CMT_RPV2:
+						/*
+						 * lastsa>>3;  we don't need
+						 * to divide ...
+						 */
+						srtt = net->lastsa;
+						if (srtt == 0) {
+							srtt = 1;
+						}
+						incr = (uint32_t) ((uint64_t) net->mtu *
+						    (uint64_t) net->cwnd /
+						    ((uint64_t) srtt *
+						    t_ucwnd_sbw));
+						/* INCREASE FACTOR */
+						if (incr == 0) {
+							incr = 1;
+						}
+						break;
+					case SCTP_CMT_MPTCP:
+						incr = (uint32_t) ((mptcp_like_alpha *
+						    (uint64_t) net->cwnd) >>
+						    SHIFT_MPTCP_MULTI);
+						if (incr > net->mtu) {
+							incr = net->mtu;
+						}
+						break;
+					default:
 						incr = net->mtu;
+						break;
 					}
 					net->cwnd += incr;
 					SDT_PROBE(sctp, cwnd, net, ack,
@@ -926,21 +1074,49 @@ sctp_cwnd_update_after_timeout(struct sc
 {
 	int old_cwnd = net->cwnd;
 	uint32_t t_ssthresh, t_cwnd;
+	uint64_t t_ucwnd_sbw;
 
 	/* MT FIXME: Don't compute this over and over again */
 	t_ssthresh = 0;
 	t_cwnd = 0;
-	if (stcb->asoc.sctp_cmt_on_off == 2) {
+	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
+	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
 		struct sctp_nets *lnet;
+		uint32_t srtt;
 
+		t_ucwnd_sbw = 0;
 		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
 			t_ssthresh += lnet->ssthresh;
 			t_cwnd += lnet->cwnd;
+			srtt = lnet->lastsa;
+			/* lastsa>>3;  we don't need to divide ... */
+			if (srtt > 0) {
+				t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt;
+			}
+		}
+		if (t_ucwnd_sbw < 1) {
+			t_ucwnd_sbw = 1;
+		}
+		if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) {
+			net->ssthresh = (uint32_t) (((uint64_t) 4 *
+			    (uint64_t) net->mtu *
+			    (uint64_t) net->ssthresh) /
+			    (uint64_t) t_ssthresh);
+		} else {
+			uint64_t cc_delta;
+
+			srtt = net->lastsa;
+			/* lastsa>>3;  we don't need to divide ... */
+			if (srtt == 0) {
+				srtt = 1;
+			}
+			cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2;
+			if (cc_delta < t_cwnd) {
+				net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta);
+			} else {
+				net->ssthresh = net->mtu;
+			}
 		}
-		net->ssthresh = (uint32_t) (((uint64_t) 4 *
-		    (uint64_t) net->mtu *
-		    (uint64_t) net->ssthresh) /
-		    (uint64_t) t_ssthresh);
 		if ((net->cwnd > t_cwnd / 2) &&
 		    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
 			net->ssthresh = net->cwnd - t_cwnd / 2;

Modified: head/sys/netinet/sctp_sysctl.h
==============================================================================
--- head/sys/netinet/sctp_sysctl.h	Wed May  4 20:29:40 2011	(r221459)
+++ head/sys/netinet/sctp_sysctl.h	Wed May  4 21:27:05 2011	(r221460)
@@ -336,9 +336,9 @@ struct sctp_sysctl {
 
 /* cmt_on_off: CMT on/off flag */
 #define SCTPCTL_CMT_ON_OFF_DESC		"CMT settings"
-#define SCTPCTL_CMT_ON_OFF_MIN		0
-#define SCTPCTL_CMT_ON_OFF_MAX		2
-#define SCTPCTL_CMT_ON_OFF_DEFAULT	0
+#define SCTPCTL_CMT_ON_OFF_MIN		SCTP_CMT_OFF
+#define SCTPCTL_CMT_ON_OFF_MAX		SCTP_CMT_MAX
+#define SCTPCTL_CMT_ON_OFF_DEFAULT	SCTP_CMT_OFF
 
 /* EY - nr_sack_on_off: NR_SACK on/off flag */
 #define SCTPCTL_NR_SACK_ON_OFF_DESC	"NR_SACK on/off flag"

Modified: head/sys/netinet/sctp_usrreq.c
==============================================================================
--- head/sys/netinet/sctp_usrreq.c	Wed May  4 20:29:40 2011	(r221459)
+++ head/sys/netinet/sctp_usrreq.c	Wed May  4 21:27:05 2011	(r221460)
@@ -2992,18 +2992,22 @@ sctp_setopt(struct socket *so, int optna
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
-				stcb->asoc.sctp_cmt_on_off = av->assoc_value;
-				if (stcb->asoc.sctp_cmt_on_off > 2) {
-					stcb->asoc.sctp_cmt_on_off = 2;
+				if (av->assoc_value > SCTP_CMT_MAX) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				} else {
+					stcb->asoc.sctp_cmt_on_off = av->assoc_value;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
-				SCTP_INP_WLOCK(inp);
-				inp->sctp_cmt_on_off = av->assoc_value;
-				if (inp->sctp_cmt_on_off > 2) {
-					inp->sctp_cmt_on_off = 2;
+				if (av->assoc_value > SCTP_CMT_MAX) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				} else {
+					SCTP_INP_WLOCK(inp);
+					inp->sctp_cmt_on_off = av->assoc_value;
+					SCTP_INP_WUNLOCK(inp);
 				}
-				SCTP_INP_WUNLOCK(inp);
 			}
 		} else {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105042127.p44LR5Ho067929>